1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/errno.h>
4 #include <linux/file.h>
5 #include <linux/slab.h>
6 #include <linux/net.h>
7 #include <linux/compat.h>
8 #include <net/compat.h>
9 #include <linux/io_uring.h>
10
11 #include <uapi/linux/io_uring.h>
12
13 #include "io_uring.h"
14 #include "kbuf.h"
15 #include "alloc_cache.h"
16 #include "net.h"
17 #include "notif.h"
18 #include "rsrc.h"
19 #include "zcrx.h"
20
21 struct io_shutdown {
22 struct file *file;
23 int how;
24 };
25
26 struct io_accept {
27 struct file *file;
28 struct sockaddr __user *addr;
29 int __user *addr_len;
30 int flags;
31 int iou_flags;
32 u32 file_slot;
33 unsigned long nofile;
34 };
35
36 struct io_socket {
37 struct file *file;
38 int domain;
39 int type;
40 int protocol;
41 int flags;
42 u32 file_slot;
43 unsigned long nofile;
44 };
45
46 struct io_connect {
47 struct file *file;
48 struct sockaddr __user *addr;
49 int addr_len;
50 bool in_progress;
51 bool seen_econnaborted;
52 };
53
54 struct io_bind {
55 struct file *file;
56 int addr_len;
57 };
58
59 struct io_listen {
60 struct file *file;
61 int backlog;
62 };
63
64 struct io_sr_msg {
65 struct file *file;
66 union {
67 struct compat_msghdr __user *umsg_compat;
68 struct user_msghdr __user *umsg;
69 void __user *buf;
70 };
71 int len;
72 unsigned done_io;
73 unsigned msg_flags;
74 unsigned nr_multishot_loops;
75 u16 flags;
76 /* initialised and used only by !msg send variants */
77 u16 buf_group;
78 unsigned short retry_flags;
79 void __user *msg_control;
80 /* used only for send zerocopy */
81 struct io_kiocb *notif;
82 };
83
84 enum sr_retry_flags {
85 IO_SR_MSG_RETRY = 1,
86 IO_SR_MSG_PARTIAL_MAP = 2,
87 };
88
89 /*
90 * Number of times we'll try and do receives if there's more data. If we
91 * exceed this limit, then add us to the back of the queue and retry from
92 * there. This helps fairness between flooding clients.
93 */
94 #define MULTISHOT_MAX_RETRY 32
95
96 struct io_recvzc {
97 struct file *file;
98 unsigned msg_flags;
99 u16 flags;
100 u32 len;
101 struct io_zcrx_ifq *ifq;
102 };
103
104 static int io_sg_from_iter_iovec(struct sk_buff *skb,
105 struct iov_iter *from, size_t length);
106 static int io_sg_from_iter(struct sk_buff *skb,
107 struct iov_iter *from, size_t length);
108
io_shutdown_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)109 int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
110 {
111 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
112
113 if (unlikely(sqe->off || sqe->addr || sqe->rw_flags ||
114 sqe->buf_index || sqe->splice_fd_in))
115 return -EINVAL;
116
117 shutdown->how = READ_ONCE(sqe->len);
118 req->flags |= REQ_F_FORCE_ASYNC;
119 return 0;
120 }
121
io_shutdown(struct io_kiocb * req,unsigned int issue_flags)122 int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
123 {
124 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
125 struct socket *sock;
126 int ret;
127
128 WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
129
130 sock = sock_from_file(req->file);
131 if (unlikely(!sock))
132 return -ENOTSOCK;
133
134 ret = __sys_shutdown_sock(sock, shutdown->how);
135 io_req_set_res(req, ret, 0);
136 return IOU_COMPLETE;
137 }
138
io_net_retry(struct socket * sock,int flags)139 static bool io_net_retry(struct socket *sock, int flags)
140 {
141 if (!(flags & MSG_WAITALL))
142 return false;
143 return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
144 }
145
io_netmsg_iovec_free(struct io_async_msghdr * kmsg)146 static void io_netmsg_iovec_free(struct io_async_msghdr *kmsg)
147 {
148 if (kmsg->vec.iovec)
149 io_vec_free(&kmsg->vec);
150 }
151
io_netmsg_recycle(struct io_kiocb * req,unsigned int issue_flags)152 static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags)
153 {
154 struct io_async_msghdr *hdr = req->async_data;
155
156 /* can't recycle, ensure we free the iovec if we have one */
157 if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) {
158 io_netmsg_iovec_free(hdr);
159 return;
160 }
161
162 /* Let normal cleanup path reap it if we fail adding to the cache */
163 io_alloc_cache_vec_kasan(&hdr->vec);
164 if (hdr->vec.nr > IO_VEC_CACHE_SOFT_CAP)
165 io_vec_free(&hdr->vec);
166
167 if (io_alloc_cache_put(&req->ctx->netmsg_cache, hdr)) {
168 req->async_data = NULL;
169 req->flags &= ~(REQ_F_ASYNC_DATA|REQ_F_NEED_CLEANUP);
170 }
171 }
172
io_msg_alloc_async(struct io_kiocb * req)173 static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req)
174 {
175 struct io_ring_ctx *ctx = req->ctx;
176 struct io_async_msghdr *hdr;
177
178 hdr = io_uring_alloc_async_data(&ctx->netmsg_cache, req);
179 if (!hdr)
180 return NULL;
181
182 /* If the async data was cached, we might have an iov cached inside. */
183 if (hdr->vec.iovec)
184 req->flags |= REQ_F_NEED_CLEANUP;
185 return hdr;
186 }
187
io_mshot_prep_retry(struct io_kiocb * req,struct io_async_msghdr * kmsg)188 static inline void io_mshot_prep_retry(struct io_kiocb *req,
189 struct io_async_msghdr *kmsg)
190 {
191 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
192
193 req->flags &= ~REQ_F_BL_EMPTY;
194 sr->done_io = 0;
195 sr->retry_flags = 0;
196 sr->len = 0; /* get from the provided buffer */
197 }
198
io_net_import_vec(struct io_kiocb * req,struct io_async_msghdr * iomsg,const struct iovec __user * uiov,unsigned uvec_seg,int ddir)199 static int io_net_import_vec(struct io_kiocb *req, struct io_async_msghdr *iomsg,
200 const struct iovec __user *uiov, unsigned uvec_seg,
201 int ddir)
202 {
203 struct iovec *iov;
204 int ret, nr_segs;
205
206 if (iomsg->vec.iovec) {
207 nr_segs = iomsg->vec.nr;
208 iov = iomsg->vec.iovec;
209 } else {
210 nr_segs = 1;
211 iov = &iomsg->fast_iov;
212 }
213
214 ret = __import_iovec(ddir, uiov, uvec_seg, nr_segs, &iov,
215 &iomsg->msg.msg_iter, io_is_compat(req->ctx));
216 if (unlikely(ret < 0))
217 return ret;
218
219 if (iov) {
220 req->flags |= REQ_F_NEED_CLEANUP;
221 io_vec_reset_iovec(&iomsg->vec, iov, iomsg->msg.msg_iter.nr_segs);
222 }
223 return 0;
224 }
225
io_compat_msg_copy_hdr(struct io_kiocb * req,struct io_async_msghdr * iomsg,struct compat_msghdr * msg,int ddir,struct sockaddr __user ** save_addr)226 static int io_compat_msg_copy_hdr(struct io_kiocb *req,
227 struct io_async_msghdr *iomsg,
228 struct compat_msghdr *msg, int ddir,
229 struct sockaddr __user **save_addr)
230 {
231 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
232 struct compat_iovec __user *uiov;
233 int ret;
234
235 if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg)))
236 return -EFAULT;
237
238 ret = __get_compat_msghdr(&iomsg->msg, msg, save_addr);
239 if (ret)
240 return ret;
241
242 uiov = compat_ptr(msg->msg_iov);
243 if (req->flags & REQ_F_BUFFER_SELECT) {
244 if (msg->msg_iovlen == 0) {
245 sr->len = 0;
246 } else if (msg->msg_iovlen > 1) {
247 return -EINVAL;
248 } else {
249 struct compat_iovec tmp_iov;
250
251 if (copy_from_user(&tmp_iov, uiov, sizeof(tmp_iov)))
252 return -EFAULT;
253 sr->len = tmp_iov.iov_len;
254 }
255 }
256 return 0;
257 }
258
io_copy_msghdr_from_user(struct user_msghdr * msg,struct user_msghdr __user * umsg)259 static int io_copy_msghdr_from_user(struct user_msghdr *msg,
260 struct user_msghdr __user *umsg)
261 {
262 if (!user_access_begin(umsg, sizeof(*umsg)))
263 return -EFAULT;
264 unsafe_get_user(msg->msg_name, &umsg->msg_name, ua_end);
265 unsafe_get_user(msg->msg_namelen, &umsg->msg_namelen, ua_end);
266 unsafe_get_user(msg->msg_iov, &umsg->msg_iov, ua_end);
267 unsafe_get_user(msg->msg_iovlen, &umsg->msg_iovlen, ua_end);
268 unsafe_get_user(msg->msg_control, &umsg->msg_control, ua_end);
269 unsafe_get_user(msg->msg_controllen, &umsg->msg_controllen, ua_end);
270 user_access_end();
271 return 0;
272 ua_end:
273 user_access_end();
274 return -EFAULT;
275 }
276
io_msg_copy_hdr(struct io_kiocb * req,struct io_async_msghdr * iomsg,struct user_msghdr * msg,int ddir,struct sockaddr __user ** save_addr)277 static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg,
278 struct user_msghdr *msg, int ddir,
279 struct sockaddr __user **save_addr)
280 {
281 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
282 struct user_msghdr __user *umsg = sr->umsg;
283 int ret;
284
285 iomsg->msg.msg_name = &iomsg->addr;
286 iomsg->msg.msg_iter.nr_segs = 0;
287
288 if (io_is_compat(req->ctx)) {
289 struct compat_msghdr cmsg;
290
291 ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ddir, save_addr);
292 if (ret)
293 return ret;
294
295 memset(msg, 0, sizeof(*msg));
296 msg->msg_namelen = cmsg.msg_namelen;
297 msg->msg_controllen = cmsg.msg_controllen;
298 msg->msg_iov = compat_ptr(cmsg.msg_iov);
299 msg->msg_iovlen = cmsg.msg_iovlen;
300 return 0;
301 }
302
303 ret = io_copy_msghdr_from_user(msg, umsg);
304 if (unlikely(ret))
305 return ret;
306
307 msg->msg_flags = 0;
308
309 ret = __copy_msghdr(&iomsg->msg, msg, save_addr);
310 if (ret)
311 return ret;
312
313 if (req->flags & REQ_F_BUFFER_SELECT) {
314 if (msg->msg_iovlen == 0) {
315 sr->len = 0;
316 } else if (msg->msg_iovlen > 1) {
317 return -EINVAL;
318 } else {
319 struct iovec __user *uiov = msg->msg_iov;
320 struct iovec tmp_iov;
321
322 if (copy_from_user(&tmp_iov, uiov, sizeof(tmp_iov)))
323 return -EFAULT;
324 sr->len = tmp_iov.iov_len;
325 }
326 }
327 return 0;
328 }
329
io_sendmsg_recvmsg_cleanup(struct io_kiocb * req)330 void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req)
331 {
332 struct io_async_msghdr *io = req->async_data;
333
334 io_netmsg_iovec_free(io);
335 }
336
io_send_setup(struct io_kiocb * req,const struct io_uring_sqe * sqe)337 static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe)
338 {
339 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
340 struct io_async_msghdr *kmsg = req->async_data;
341 void __user *addr;
342 u16 addr_len;
343 int ret;
344
345 sr->buf = u64_to_user_ptr(READ_ONCE(sqe->addr));
346
347 if (READ_ONCE(sqe->__pad3[0]))
348 return -EINVAL;
349
350 kmsg->msg.msg_name = NULL;
351 kmsg->msg.msg_namelen = 0;
352 kmsg->msg.msg_control = NULL;
353 kmsg->msg.msg_controllen = 0;
354 kmsg->msg.msg_ubuf = NULL;
355
356 addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
357 addr_len = READ_ONCE(sqe->addr_len);
358 if (addr) {
359 ret = move_addr_to_kernel(addr, addr_len, &kmsg->addr);
360 if (unlikely(ret < 0))
361 return ret;
362 kmsg->msg.msg_name = &kmsg->addr;
363 kmsg->msg.msg_namelen = addr_len;
364 }
365 if (sr->flags & IORING_RECVSEND_FIXED_BUF) {
366 req->flags |= REQ_F_IMPORT_BUFFER;
367 return 0;
368 }
369 if (req->flags & REQ_F_BUFFER_SELECT)
370 return 0;
371 return import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter);
372 }
373
io_sendmsg_setup(struct io_kiocb * req,const struct io_uring_sqe * sqe)374 static int io_sendmsg_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe)
375 {
376 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
377 struct io_async_msghdr *kmsg = req->async_data;
378 struct user_msghdr msg;
379 int ret;
380
381 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
382 ret = io_msg_copy_hdr(req, kmsg, &msg, ITER_SOURCE, NULL);
383 if (unlikely(ret))
384 return ret;
385 /* save msg_control as sys_sendmsg() overwrites it */
386 sr->msg_control = kmsg->msg.msg_control_user;
387
388 if (sr->flags & IORING_RECVSEND_FIXED_BUF) {
389 kmsg->msg.msg_iter.nr_segs = msg.msg_iovlen;
390 return io_prep_reg_iovec(req, &kmsg->vec, msg.msg_iov,
391 msg.msg_iovlen);
392 }
393 if (req->flags & REQ_F_BUFFER_SELECT)
394 return 0;
395 return io_net_import_vec(req, kmsg, msg.msg_iov, msg.msg_iovlen, ITER_SOURCE);
396 }
397
398 #define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE)
399
io_sendmsg_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)400 int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
401 {
402 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
403
404 sr->done_io = 0;
405 sr->retry_flags = 0;
406 sr->len = READ_ONCE(sqe->len);
407 sr->flags = READ_ONCE(sqe->ioprio);
408 if (sr->flags & ~SENDMSG_FLAGS)
409 return -EINVAL;
410 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
411 if (sr->msg_flags & MSG_DONTWAIT)
412 req->flags |= REQ_F_NOWAIT;
413 if (req->flags & REQ_F_BUFFER_SELECT)
414 sr->buf_group = req->buf_index;
415 if (sr->flags & IORING_RECVSEND_BUNDLE) {
416 if (req->opcode == IORING_OP_SENDMSG)
417 return -EINVAL;
418 sr->msg_flags |= MSG_WAITALL;
419 req->buf_list = NULL;
420 req->flags |= REQ_F_MULTISHOT;
421 }
422
423 if (io_is_compat(req->ctx))
424 sr->msg_flags |= MSG_CMSG_COMPAT;
425
426 if (unlikely(!io_msg_alloc_async(req)))
427 return -ENOMEM;
428 if (req->opcode != IORING_OP_SENDMSG)
429 return io_send_setup(req, sqe);
430 if (unlikely(sqe->addr2 || sqe->file_index))
431 return -EINVAL;
432 return io_sendmsg_setup(req, sqe);
433 }
434
io_req_msg_cleanup(struct io_kiocb * req,unsigned int issue_flags)435 static void io_req_msg_cleanup(struct io_kiocb *req,
436 unsigned int issue_flags)
437 {
438 io_netmsg_recycle(req, issue_flags);
439 }
440
441 /*
442 * For bundle completions, we need to figure out how many segments we consumed.
443 * A bundle could be using a single ITER_UBUF if that's all we mapped, or it
444 * could be using an ITER_IOVEC. If the latter, then if we consumed all of
445 * the segments, then it's a trivial questiont o answer. If we have residual
446 * data in the iter, then loop the segments to figure out how much we
447 * transferred.
448 */
io_bundle_nbufs(struct io_async_msghdr * kmsg,int ret)449 static int io_bundle_nbufs(struct io_async_msghdr *kmsg, int ret)
450 {
451 struct iovec *iov;
452 int nbufs;
453
454 /* no data is always zero segments, and a ubuf is always 1 segment */
455 if (ret <= 0)
456 return 0;
457 if (iter_is_ubuf(&kmsg->msg.msg_iter))
458 return 1;
459
460 iov = kmsg->vec.iovec;
461 if (!iov)
462 iov = &kmsg->fast_iov;
463
464 /* if all data was transferred, it's basic pointer math */
465 if (!iov_iter_count(&kmsg->msg.msg_iter))
466 return iter_iov(&kmsg->msg.msg_iter) - iov;
467
468 /* short transfer, count segments */
469 nbufs = 0;
470 do {
471 int this_len = min_t(int, iov[nbufs].iov_len, ret);
472
473 nbufs++;
474 ret -= this_len;
475 } while (ret);
476
477 return nbufs;
478 }
479
io_send_finish(struct io_kiocb * req,int * ret,struct io_async_msghdr * kmsg,unsigned issue_flags)480 static inline bool io_send_finish(struct io_kiocb *req, int *ret,
481 struct io_async_msghdr *kmsg,
482 unsigned issue_flags)
483 {
484 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
485 bool bundle_finished = *ret <= 0;
486 unsigned int cflags;
487
488 if (!(sr->flags & IORING_RECVSEND_BUNDLE)) {
489 cflags = io_put_kbuf(req, *ret, issue_flags);
490 goto finish;
491 }
492
493 cflags = io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret), issue_flags);
494
495 if (bundle_finished || req->flags & REQ_F_BL_EMPTY)
496 goto finish;
497
498 /*
499 * Fill CQE for this receive and see if we should keep trying to
500 * receive from this socket.
501 */
502 if (io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) {
503 io_mshot_prep_retry(req, kmsg);
504 return false;
505 }
506
507 /* Otherwise stop bundle and use the current result. */
508 finish:
509 io_req_set_res(req, *ret, cflags);
510 *ret = IOU_COMPLETE;
511 return true;
512 }
513
io_sendmsg(struct io_kiocb * req,unsigned int issue_flags)514 int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
515 {
516 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
517 struct io_async_msghdr *kmsg = req->async_data;
518 struct socket *sock;
519 unsigned flags;
520 int min_ret = 0;
521 int ret;
522
523 sock = sock_from_file(req->file);
524 if (unlikely(!sock))
525 return -ENOTSOCK;
526
527 if (!(req->flags & REQ_F_POLLED) &&
528 (sr->flags & IORING_RECVSEND_POLL_FIRST))
529 return -EAGAIN;
530
531 flags = sr->msg_flags;
532 if (issue_flags & IO_URING_F_NONBLOCK)
533 flags |= MSG_DONTWAIT;
534 if (flags & MSG_WAITALL)
535 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
536
537 kmsg->msg.msg_control_user = sr->msg_control;
538
539 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
540
541 if (ret < min_ret) {
542 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
543 return -EAGAIN;
544 if (ret > 0 && io_net_retry(sock, flags)) {
545 kmsg->msg.msg_controllen = 0;
546 kmsg->msg.msg_control = NULL;
547 sr->done_io += ret;
548 req->flags |= REQ_F_BL_NO_RECYCLE;
549 return -EAGAIN;
550 }
551 if (ret == -ERESTARTSYS)
552 ret = -EINTR;
553 req_set_fail(req);
554 }
555 io_req_msg_cleanup(req, issue_flags);
556 if (ret >= 0)
557 ret += sr->done_io;
558 else if (sr->done_io)
559 ret = sr->done_io;
560 io_req_set_res(req, ret, 0);
561 return IOU_COMPLETE;
562 }
563
io_send_select_buffer(struct io_kiocb * req,unsigned int issue_flags,struct io_async_msghdr * kmsg)564 static int io_send_select_buffer(struct io_kiocb *req, unsigned int issue_flags,
565 struct io_async_msghdr *kmsg)
566 {
567 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
568
569 int ret;
570 struct buf_sel_arg arg = {
571 .iovs = &kmsg->fast_iov,
572 .max_len = min_not_zero(sr->len, INT_MAX),
573 .nr_iovs = 1,
574 .buf_group = sr->buf_group,
575 };
576
577 if (kmsg->vec.iovec) {
578 arg.nr_iovs = kmsg->vec.nr;
579 arg.iovs = kmsg->vec.iovec;
580 arg.mode = KBUF_MODE_FREE;
581 }
582
583 if (!(sr->flags & IORING_RECVSEND_BUNDLE))
584 arg.nr_iovs = 1;
585 else
586 arg.mode |= KBUF_MODE_EXPAND;
587
588 ret = io_buffers_select(req, &arg, issue_flags);
589 if (unlikely(ret < 0))
590 return ret;
591
592 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) {
593 kmsg->vec.nr = ret;
594 kmsg->vec.iovec = arg.iovs;
595 req->flags |= REQ_F_NEED_CLEANUP;
596 }
597 sr->len = arg.out_len;
598
599 if (ret == 1) {
600 sr->buf = arg.iovs[0].iov_base;
601 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len,
602 &kmsg->msg.msg_iter);
603 if (unlikely(ret))
604 return ret;
605 } else {
606 iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE,
607 arg.iovs, ret, arg.out_len);
608 }
609
610 return 0;
611 }
612
io_send(struct io_kiocb * req,unsigned int issue_flags)613 int io_send(struct io_kiocb *req, unsigned int issue_flags)
614 {
615 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
616 struct io_async_msghdr *kmsg = req->async_data;
617 struct socket *sock;
618 unsigned flags;
619 int min_ret = 0;
620 int ret;
621
622 sock = sock_from_file(req->file);
623 if (unlikely(!sock))
624 return -ENOTSOCK;
625
626 if (!(req->flags & REQ_F_POLLED) &&
627 (sr->flags & IORING_RECVSEND_POLL_FIRST))
628 return -EAGAIN;
629
630 flags = sr->msg_flags;
631 if (issue_flags & IO_URING_F_NONBLOCK)
632 flags |= MSG_DONTWAIT;
633
634 retry_bundle:
635 if (io_do_buffer_select(req)) {
636 ret = io_send_select_buffer(req, issue_flags, kmsg);
637 if (ret)
638 return ret;
639 }
640
641 /*
642 * If MSG_WAITALL is set, or this is a bundle send, then we need
643 * the full amount. If just bundle is set, if we do a short send
644 * then we complete the bundle sequence rather than continue on.
645 */
646 if (flags & MSG_WAITALL || sr->flags & IORING_RECVSEND_BUNDLE)
647 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
648
649 flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
650 kmsg->msg.msg_flags = flags;
651 ret = sock_sendmsg(sock, &kmsg->msg);
652 if (ret < min_ret) {
653 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
654 return -EAGAIN;
655
656 if (ret > 0 && io_net_retry(sock, flags)) {
657 sr->len -= ret;
658 sr->buf += ret;
659 sr->done_io += ret;
660 req->flags |= REQ_F_BL_NO_RECYCLE;
661 return -EAGAIN;
662 }
663 if (ret == -ERESTARTSYS)
664 ret = -EINTR;
665 req_set_fail(req);
666 }
667 if (ret >= 0)
668 ret += sr->done_io;
669 else if (sr->done_io)
670 ret = sr->done_io;
671
672 if (!io_send_finish(req, &ret, kmsg, issue_flags))
673 goto retry_bundle;
674
675 io_req_msg_cleanup(req, issue_flags);
676 return ret;
677 }
678
io_recvmsg_mshot_prep(struct io_kiocb * req,struct io_async_msghdr * iomsg,int namelen,size_t controllen)679 static int io_recvmsg_mshot_prep(struct io_kiocb *req,
680 struct io_async_msghdr *iomsg,
681 int namelen, size_t controllen)
682 {
683 if ((req->flags & (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) ==
684 (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) {
685 int hdr;
686
687 if (unlikely(namelen < 0))
688 return -EOVERFLOW;
689 if (check_add_overflow(sizeof(struct io_uring_recvmsg_out),
690 namelen, &hdr))
691 return -EOVERFLOW;
692 if (check_add_overflow(hdr, controllen, &hdr))
693 return -EOVERFLOW;
694
695 iomsg->namelen = namelen;
696 iomsg->controllen = controllen;
697 return 0;
698 }
699
700 return 0;
701 }
702
io_recvmsg_copy_hdr(struct io_kiocb * req,struct io_async_msghdr * iomsg)703 static int io_recvmsg_copy_hdr(struct io_kiocb *req,
704 struct io_async_msghdr *iomsg)
705 {
706 struct user_msghdr msg;
707 int ret;
708
709 ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST, &iomsg->uaddr);
710 if (unlikely(ret))
711 return ret;
712
713 if (!(req->flags & REQ_F_BUFFER_SELECT)) {
714 ret = io_net_import_vec(req, iomsg, msg.msg_iov, msg.msg_iovlen,
715 ITER_DEST);
716 if (unlikely(ret))
717 return ret;
718 }
719 return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen,
720 msg.msg_controllen);
721 }
722
io_recvmsg_prep_setup(struct io_kiocb * req)723 static int io_recvmsg_prep_setup(struct io_kiocb *req)
724 {
725 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
726 struct io_async_msghdr *kmsg;
727
728 kmsg = io_msg_alloc_async(req);
729 if (unlikely(!kmsg))
730 return -ENOMEM;
731
732 if (req->opcode == IORING_OP_RECV) {
733 kmsg->msg.msg_name = NULL;
734 kmsg->msg.msg_namelen = 0;
735 kmsg->msg.msg_inq = 0;
736 kmsg->msg.msg_control = NULL;
737 kmsg->msg.msg_get_inq = 1;
738 kmsg->msg.msg_controllen = 0;
739 kmsg->msg.msg_iocb = NULL;
740 kmsg->msg.msg_ubuf = NULL;
741
742 if (req->flags & REQ_F_BUFFER_SELECT)
743 return 0;
744 return import_ubuf(ITER_DEST, sr->buf, sr->len,
745 &kmsg->msg.msg_iter);
746 }
747
748 return io_recvmsg_copy_hdr(req, kmsg);
749 }
750
751 #define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT | \
752 IORING_RECVSEND_BUNDLE)
753
io_recvmsg_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)754 int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
755 {
756 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
757
758 sr->done_io = 0;
759 sr->retry_flags = 0;
760
761 if (unlikely(sqe->file_index || sqe->addr2))
762 return -EINVAL;
763
764 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
765 sr->len = READ_ONCE(sqe->len);
766 sr->flags = READ_ONCE(sqe->ioprio);
767 if (sr->flags & ~RECVMSG_FLAGS)
768 return -EINVAL;
769 sr->msg_flags = READ_ONCE(sqe->msg_flags);
770 if (sr->msg_flags & MSG_DONTWAIT)
771 req->flags |= REQ_F_NOWAIT;
772 if (sr->msg_flags & MSG_ERRQUEUE)
773 req->flags |= REQ_F_CLEAR_POLLIN;
774 if (req->flags & REQ_F_BUFFER_SELECT) {
775 /*
776 * Store the buffer group for this multishot receive separately,
777 * as if we end up doing an io-wq based issue that selects a
778 * buffer, it has to be committed immediately and that will
779 * clear ->buf_list. This means we lose the link to the buffer
780 * list, and the eventual buffer put on completion then cannot
781 * restore it.
782 */
783 sr->buf_group = req->buf_index;
784 req->buf_list = NULL;
785 }
786 if (sr->flags & IORING_RECV_MULTISHOT) {
787 if (!(req->flags & REQ_F_BUFFER_SELECT))
788 return -EINVAL;
789 if (sr->msg_flags & MSG_WAITALL)
790 return -EINVAL;
791 if (req->opcode == IORING_OP_RECV && sr->len)
792 return -EINVAL;
793 req->flags |= REQ_F_APOLL_MULTISHOT;
794 }
795 if (sr->flags & IORING_RECVSEND_BUNDLE) {
796 if (req->opcode == IORING_OP_RECVMSG)
797 return -EINVAL;
798 }
799
800 if (io_is_compat(req->ctx))
801 sr->msg_flags |= MSG_CMSG_COMPAT;
802
803 sr->nr_multishot_loops = 0;
804 return io_recvmsg_prep_setup(req);
805 }
806
807 /* bits to clear in old and inherit in new cflags on bundle retry */
808 #define CQE_F_MASK (IORING_CQE_F_SOCK_NONEMPTY|IORING_CQE_F_MORE)
809
810 /*
811 * Finishes io_recv and io_recvmsg.
812 *
813 * Returns true if it is actually finished, or false if it should run
814 * again (for multishot).
815 */
io_recv_finish(struct io_kiocb * req,int * ret,struct io_async_msghdr * kmsg,bool mshot_finished,unsigned issue_flags)816 static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
817 struct io_async_msghdr *kmsg,
818 bool mshot_finished, unsigned issue_flags)
819 {
820 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
821 unsigned int cflags = 0;
822
823 if (kmsg->msg.msg_inq > 0)
824 cflags |= IORING_CQE_F_SOCK_NONEMPTY;
825
826 if (sr->flags & IORING_RECVSEND_BUNDLE) {
827 size_t this_ret = *ret - sr->done_io;
828
829 cflags |= io_put_kbufs(req, this_ret, io_bundle_nbufs(kmsg, this_ret),
830 issue_flags);
831 if (sr->retry_flags & IO_SR_MSG_RETRY)
832 cflags = req->cqe.flags | (cflags & CQE_F_MASK);
833 /* bundle with no more immediate buffers, we're done */
834 if (req->flags & REQ_F_BL_EMPTY)
835 goto finish;
836 /*
837 * If more is available AND it was a full transfer, retry and
838 * append to this one
839 */
840 if (!sr->retry_flags && kmsg->msg.msg_inq > 1 && this_ret > 0 &&
841 !iov_iter_count(&kmsg->msg.msg_iter)) {
842 req->cqe.flags = cflags & ~CQE_F_MASK;
843 sr->len = kmsg->msg.msg_inq;
844 sr->done_io += this_ret;
845 sr->retry_flags |= IO_SR_MSG_RETRY;
846 return false;
847 }
848 } else {
849 cflags |= io_put_kbuf(req, *ret, issue_flags);
850 }
851
852 /*
853 * Fill CQE for this receive and see if we should keep trying to
854 * receive from this socket.
855 */
856 if ((req->flags & REQ_F_APOLL_MULTISHOT) && !mshot_finished &&
857 io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) {
858 *ret = IOU_RETRY;
859 io_mshot_prep_retry(req, kmsg);
860 /* Known not-empty or unknown state, retry */
861 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || kmsg->msg.msg_inq < 0) {
862 if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY)
863 return false;
864 /* mshot retries exceeded, force a requeue */
865 sr->nr_multishot_loops = 0;
866 if (issue_flags & IO_URING_F_MULTISHOT)
867 *ret = IOU_REQUEUE;
868 }
869 return true;
870 }
871
872 /* Finish the request / stop multishot. */
873 finish:
874 io_req_set_res(req, *ret, cflags);
875 *ret = IOU_COMPLETE;
876 io_req_msg_cleanup(req, issue_flags);
877 return true;
878 }
879
io_recvmsg_prep_multishot(struct io_async_msghdr * kmsg,struct io_sr_msg * sr,void __user ** buf,size_t * len)880 static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg,
881 struct io_sr_msg *sr, void __user **buf,
882 size_t *len)
883 {
884 unsigned long ubuf = (unsigned long) *buf;
885 unsigned long hdr;
886
887 hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen +
888 kmsg->controllen;
889 if (*len < hdr)
890 return -EFAULT;
891
892 if (kmsg->controllen) {
893 unsigned long control = ubuf + hdr - kmsg->controllen;
894
895 kmsg->msg.msg_control_user = (void __user *) control;
896 kmsg->msg.msg_controllen = kmsg->controllen;
897 }
898
899 sr->buf = *buf; /* stash for later copy */
900 *buf = (void __user *) (ubuf + hdr);
901 kmsg->payloadlen = *len = *len - hdr;
902 return 0;
903 }
904
905 struct io_recvmsg_multishot_hdr {
906 struct io_uring_recvmsg_out msg;
907 struct sockaddr_storage addr;
908 };
909
io_recvmsg_multishot(struct socket * sock,struct io_sr_msg * io,struct io_async_msghdr * kmsg,unsigned int flags,bool * finished)910 static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io,
911 struct io_async_msghdr *kmsg,
912 unsigned int flags, bool *finished)
913 {
914 int err;
915 int copy_len;
916 struct io_recvmsg_multishot_hdr hdr;
917
918 if (kmsg->namelen)
919 kmsg->msg.msg_name = &hdr.addr;
920 kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
921 kmsg->msg.msg_namelen = 0;
922
923 if (sock->file->f_flags & O_NONBLOCK)
924 flags |= MSG_DONTWAIT;
925
926 err = sock_recvmsg(sock, &kmsg->msg, flags);
927 *finished = err <= 0;
928 if (err < 0)
929 return err;
930
931 hdr.msg = (struct io_uring_recvmsg_out) {
932 .controllen = kmsg->controllen - kmsg->msg.msg_controllen,
933 .flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT
934 };
935
936 hdr.msg.payloadlen = err;
937 if (err > kmsg->payloadlen)
938 err = kmsg->payloadlen;
939
940 copy_len = sizeof(struct io_uring_recvmsg_out);
941 if (kmsg->msg.msg_namelen > kmsg->namelen)
942 copy_len += kmsg->namelen;
943 else
944 copy_len += kmsg->msg.msg_namelen;
945
946 /*
947 * "fromlen shall refer to the value before truncation.."
948 * 1003.1g
949 */
950 hdr.msg.namelen = kmsg->msg.msg_namelen;
951
952 /* ensure that there is no gap between hdr and sockaddr_storage */
953 BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) !=
954 sizeof(struct io_uring_recvmsg_out));
955 if (copy_to_user(io->buf, &hdr, copy_len)) {
956 *finished = true;
957 return -EFAULT;
958 }
959
960 return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen +
961 kmsg->controllen + err;
962 }
963
io_recvmsg(struct io_kiocb * req,unsigned int issue_flags)964 int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
965 {
966 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
967 struct io_async_msghdr *kmsg = req->async_data;
968 struct socket *sock;
969 unsigned flags;
970 int ret, min_ret = 0;
971 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
972 bool mshot_finished = true;
973
974 sock = sock_from_file(req->file);
975 if (unlikely(!sock))
976 return -ENOTSOCK;
977
978 if (!(req->flags & REQ_F_POLLED) &&
979 (sr->flags & IORING_RECVSEND_POLL_FIRST))
980 return -EAGAIN;
981
982 flags = sr->msg_flags;
983 if (force_nonblock)
984 flags |= MSG_DONTWAIT;
985
986 retry_multishot:
987 if (io_do_buffer_select(req)) {
988 void __user *buf;
989 size_t len = sr->len;
990
991 buf = io_buffer_select(req, &len, sr->buf_group, issue_flags);
992 if (!buf)
993 return -ENOBUFS;
994
995 if (req->flags & REQ_F_APOLL_MULTISHOT) {
996 ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len);
997 if (ret) {
998 io_kbuf_recycle(req, issue_flags);
999 return ret;
1000 }
1001 }
1002
1003 iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, buf, len);
1004 }
1005
1006 kmsg->msg.msg_get_inq = 1;
1007 kmsg->msg.msg_inq = -1;
1008 if (req->flags & REQ_F_APOLL_MULTISHOT) {
1009 ret = io_recvmsg_multishot(sock, sr, kmsg, flags,
1010 &mshot_finished);
1011 } else {
1012 /* disable partial retry for recvmsg with cmsg attached */
1013 if (flags & MSG_WAITALL && !kmsg->msg.msg_controllen)
1014 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
1015
1016 ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg,
1017 kmsg->uaddr, flags);
1018 }
1019
1020 if (ret < min_ret) {
1021 if (ret == -EAGAIN && force_nonblock) {
1022 if (issue_flags & IO_URING_F_MULTISHOT)
1023 io_kbuf_recycle(req, issue_flags);
1024
1025 return IOU_RETRY;
1026 }
1027 if (ret > 0 && io_net_retry(sock, flags)) {
1028 sr->done_io += ret;
1029 req->flags |= REQ_F_BL_NO_RECYCLE;
1030 return IOU_RETRY;
1031 }
1032 if (ret == -ERESTARTSYS)
1033 ret = -EINTR;
1034 req_set_fail(req);
1035 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
1036 req_set_fail(req);
1037 }
1038
1039 if (ret > 0)
1040 ret += sr->done_io;
1041 else if (sr->done_io)
1042 ret = sr->done_io;
1043 else
1044 io_kbuf_recycle(req, issue_flags);
1045
1046 if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags))
1047 goto retry_multishot;
1048
1049 return ret;
1050 }
1051
io_recv_buf_select(struct io_kiocb * req,struct io_async_msghdr * kmsg,size_t * len,unsigned int issue_flags)1052 static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg,
1053 size_t *len, unsigned int issue_flags)
1054 {
1055 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1056 int ret;
1057
1058 /*
1059 * If the ring isn't locked, then don't use the peek interface
1060 * to grab multiple buffers as we will lock/unlock between
1061 * this selection and posting the buffers.
1062 */
1063 if (!(issue_flags & IO_URING_F_UNLOCKED) &&
1064 sr->flags & IORING_RECVSEND_BUNDLE) {
1065 struct buf_sel_arg arg = {
1066 .iovs = &kmsg->fast_iov,
1067 .nr_iovs = 1,
1068 .mode = KBUF_MODE_EXPAND,
1069 .buf_group = sr->buf_group,
1070 };
1071
1072 if (kmsg->vec.iovec) {
1073 arg.nr_iovs = kmsg->vec.nr;
1074 arg.iovs = kmsg->vec.iovec;
1075 arg.mode |= KBUF_MODE_FREE;
1076 }
1077
1078 if (kmsg->msg.msg_inq > 1)
1079 arg.max_len = min_not_zero(sr->len, kmsg->msg.msg_inq);
1080
1081 ret = io_buffers_peek(req, &arg);
1082 if (unlikely(ret < 0))
1083 return ret;
1084
1085 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) {
1086 kmsg->vec.nr = ret;
1087 kmsg->vec.iovec = arg.iovs;
1088 req->flags |= REQ_F_NEED_CLEANUP;
1089 }
1090 if (arg.partial_map)
1091 sr->retry_flags |= IO_SR_MSG_PARTIAL_MAP;
1092
1093 /* special case 1 vec, can be a fast path */
1094 if (ret == 1) {
1095 sr->buf = arg.iovs[0].iov_base;
1096 sr->len = arg.iovs[0].iov_len;
1097 goto map_ubuf;
1098 }
1099 iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret,
1100 arg.out_len);
1101 } else {
1102 void __user *buf;
1103
1104 *len = sr->len;
1105 buf = io_buffer_select(req, len, sr->buf_group, issue_flags);
1106 if (!buf)
1107 return -ENOBUFS;
1108 sr->buf = buf;
1109 sr->len = *len;
1110 map_ubuf:
1111 ret = import_ubuf(ITER_DEST, sr->buf, sr->len,
1112 &kmsg->msg.msg_iter);
1113 if (unlikely(ret))
1114 return ret;
1115 }
1116
1117 return 0;
1118 }
1119
io_recv(struct io_kiocb * req,unsigned int issue_flags)1120 int io_recv(struct io_kiocb *req, unsigned int issue_flags)
1121 {
1122 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1123 struct io_async_msghdr *kmsg = req->async_data;
1124 struct socket *sock;
1125 unsigned flags;
1126 int ret, min_ret = 0;
1127 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1128 size_t len = sr->len;
1129 bool mshot_finished;
1130
1131 if (!(req->flags & REQ_F_POLLED) &&
1132 (sr->flags & IORING_RECVSEND_POLL_FIRST))
1133 return -EAGAIN;
1134
1135 sock = sock_from_file(req->file);
1136 if (unlikely(!sock))
1137 return -ENOTSOCK;
1138
1139 flags = sr->msg_flags;
1140 if (force_nonblock)
1141 flags |= MSG_DONTWAIT;
1142
1143 retry_multishot:
1144 if (io_do_buffer_select(req)) {
1145 ret = io_recv_buf_select(req, kmsg, &len, issue_flags);
1146 if (unlikely(ret)) {
1147 kmsg->msg.msg_inq = -1;
1148 goto out_free;
1149 }
1150 sr->buf = NULL;
1151 }
1152
1153 kmsg->msg.msg_flags = 0;
1154 kmsg->msg.msg_inq = -1;
1155
1156 if (flags & MSG_WAITALL)
1157 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
1158
1159 ret = sock_recvmsg(sock, &kmsg->msg, flags);
1160 if (ret < min_ret) {
1161 if (ret == -EAGAIN && force_nonblock) {
1162 if (issue_flags & IO_URING_F_MULTISHOT)
1163 io_kbuf_recycle(req, issue_flags);
1164
1165 return IOU_RETRY;
1166 }
1167 if (ret > 0 && io_net_retry(sock, flags)) {
1168 sr->len -= ret;
1169 sr->buf += ret;
1170 sr->done_io += ret;
1171 req->flags |= REQ_F_BL_NO_RECYCLE;
1172 return -EAGAIN;
1173 }
1174 if (ret == -ERESTARTSYS)
1175 ret = -EINTR;
1176 req_set_fail(req);
1177 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
1178 out_free:
1179 req_set_fail(req);
1180 }
1181
1182 mshot_finished = ret <= 0;
1183 if (ret > 0)
1184 ret += sr->done_io;
1185 else if (sr->done_io)
1186 ret = sr->done_io;
1187 else
1188 io_kbuf_recycle(req, issue_flags);
1189
1190 if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags))
1191 goto retry_multishot;
1192
1193 return ret;
1194 }
1195
io_recvzc_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)1196 int io_recvzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1197 {
1198 struct io_recvzc *zc = io_kiocb_to_cmd(req, struct io_recvzc);
1199 unsigned ifq_idx;
1200
1201 if (unlikely(sqe->addr2 || sqe->addr || sqe->addr3))
1202 return -EINVAL;
1203
1204 ifq_idx = READ_ONCE(sqe->zcrx_ifq_idx);
1205 zc->ifq = xa_load(&req->ctx->zcrx_ctxs, ifq_idx);
1206 if (!zc->ifq)
1207 return -EINVAL;
1208
1209 zc->len = READ_ONCE(sqe->len);
1210 zc->flags = READ_ONCE(sqe->ioprio);
1211 zc->msg_flags = READ_ONCE(sqe->msg_flags);
1212 if (zc->msg_flags)
1213 return -EINVAL;
1214 if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT))
1215 return -EINVAL;
1216 /* multishot required */
1217 if (!(zc->flags & IORING_RECV_MULTISHOT))
1218 return -EINVAL;
1219 /* All data completions are posted as aux CQEs. */
1220 req->flags |= REQ_F_APOLL_MULTISHOT;
1221
1222 return 0;
1223 }
1224
io_recvzc(struct io_kiocb * req,unsigned int issue_flags)1225 int io_recvzc(struct io_kiocb *req, unsigned int issue_flags)
1226 {
1227 struct io_recvzc *zc = io_kiocb_to_cmd(req, struct io_recvzc);
1228 struct socket *sock;
1229 unsigned int len;
1230 int ret;
1231
1232 if (!(req->flags & REQ_F_POLLED) &&
1233 (zc->flags & IORING_RECVSEND_POLL_FIRST))
1234 return -EAGAIN;
1235
1236 sock = sock_from_file(req->file);
1237 if (unlikely(!sock))
1238 return -ENOTSOCK;
1239
1240 len = zc->len;
1241 ret = io_zcrx_recv(req, zc->ifq, sock, zc->msg_flags | MSG_DONTWAIT,
1242 issue_flags, &zc->len);
1243 if (len && zc->len == 0) {
1244 io_req_set_res(req, 0, 0);
1245
1246 return IOU_COMPLETE;
1247 }
1248 if (unlikely(ret <= 0) && ret != -EAGAIN) {
1249 if (ret == -ERESTARTSYS)
1250 ret = -EINTR;
1251 if (ret == IOU_REQUEUE)
1252 return IOU_REQUEUE;
1253
1254 req_set_fail(req);
1255 io_req_set_res(req, ret, 0);
1256 return IOU_COMPLETE;
1257 }
1258 return IOU_RETRY;
1259 }
1260
io_send_zc_cleanup(struct io_kiocb * req)1261 void io_send_zc_cleanup(struct io_kiocb *req)
1262 {
1263 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
1264 struct io_async_msghdr *io = req->async_data;
1265
1266 if (req_has_async_data(req))
1267 io_netmsg_iovec_free(io);
1268 if (zc->notif) {
1269 io_notif_flush(zc->notif);
1270 zc->notif = NULL;
1271 }
1272 }
1273
1274 #define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF)
1275 #define IO_ZC_FLAGS_VALID (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE)
1276
io_send_zc_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)1277 int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1278 {
1279 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
1280 struct io_ring_ctx *ctx = req->ctx;
1281 struct io_async_msghdr *iomsg;
1282 struct io_kiocb *notif;
1283 int ret;
1284
1285 zc->done_io = 0;
1286 zc->retry_flags = 0;
1287
1288 if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))
1289 return -EINVAL;
1290 /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */
1291 if (req->flags & REQ_F_CQE_SKIP)
1292 return -EINVAL;
1293
1294 notif = zc->notif = io_alloc_notif(ctx);
1295 if (!notif)
1296 return -ENOMEM;
1297 notif->cqe.user_data = req->cqe.user_data;
1298 notif->cqe.res = 0;
1299 notif->cqe.flags = IORING_CQE_F_NOTIF;
1300 req->flags |= REQ_F_NEED_CLEANUP | REQ_F_POLL_NO_LAZY;
1301
1302 zc->flags = READ_ONCE(sqe->ioprio);
1303 if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) {
1304 if (zc->flags & ~IO_ZC_FLAGS_VALID)
1305 return -EINVAL;
1306 if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) {
1307 struct io_notif_data *nd = io_notif_to_data(notif);
1308
1309 nd->zc_report = true;
1310 nd->zc_used = false;
1311 nd->zc_copied = false;
1312 }
1313 }
1314
1315 zc->len = READ_ONCE(sqe->len);
1316 zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL | MSG_ZEROCOPY;
1317 req->buf_index = READ_ONCE(sqe->buf_index);
1318 if (zc->msg_flags & MSG_DONTWAIT)
1319 req->flags |= REQ_F_NOWAIT;
1320
1321 if (io_is_compat(req->ctx))
1322 zc->msg_flags |= MSG_CMSG_COMPAT;
1323
1324 iomsg = io_msg_alloc_async(req);
1325 if (unlikely(!iomsg))
1326 return -ENOMEM;
1327
1328 if (req->opcode == IORING_OP_SEND_ZC) {
1329 ret = io_send_setup(req, sqe);
1330 } else {
1331 if (unlikely(sqe->addr2 || sqe->file_index))
1332 return -EINVAL;
1333 ret = io_sendmsg_setup(req, sqe);
1334 }
1335 if (unlikely(ret))
1336 return ret;
1337
1338 if (!(zc->flags & IORING_RECVSEND_FIXED_BUF)) {
1339 iomsg->msg.sg_from_iter = io_sg_from_iter_iovec;
1340 return io_notif_account_mem(zc->notif, iomsg->msg.msg_iter.count);
1341 }
1342 iomsg->msg.sg_from_iter = io_sg_from_iter;
1343 return 0;
1344 }
1345
io_sg_from_iter_iovec(struct sk_buff * skb,struct iov_iter * from,size_t length)1346 static int io_sg_from_iter_iovec(struct sk_buff *skb,
1347 struct iov_iter *from, size_t length)
1348 {
1349 skb_zcopy_downgrade_managed(skb);
1350 return zerocopy_fill_skb_from_iter(skb, from, length);
1351 }
1352
io_sg_from_iter(struct sk_buff * skb,struct iov_iter * from,size_t length)1353 static int io_sg_from_iter(struct sk_buff *skb,
1354 struct iov_iter *from, size_t length)
1355 {
1356 struct skb_shared_info *shinfo = skb_shinfo(skb);
1357 int frag = shinfo->nr_frags;
1358 int ret = 0;
1359 struct bvec_iter bi;
1360 ssize_t copied = 0;
1361 unsigned long truesize = 0;
1362
1363 if (!frag)
1364 shinfo->flags |= SKBFL_MANAGED_FRAG_REFS;
1365 else if (unlikely(!skb_zcopy_managed(skb)))
1366 return zerocopy_fill_skb_from_iter(skb, from, length);
1367
1368 bi.bi_size = min(from->count, length);
1369 bi.bi_bvec_done = from->iov_offset;
1370 bi.bi_idx = 0;
1371
1372 while (bi.bi_size && frag < MAX_SKB_FRAGS) {
1373 struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi);
1374
1375 copied += v.bv_len;
1376 truesize += PAGE_ALIGN(v.bv_len + v.bv_offset);
1377 __skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page,
1378 v.bv_offset, v.bv_len);
1379 bvec_iter_advance_single(from->bvec, &bi, v.bv_len);
1380 }
1381 if (bi.bi_size)
1382 ret = -EMSGSIZE;
1383
1384 shinfo->nr_frags = frag;
1385 from->bvec += bi.bi_idx;
1386 from->nr_segs -= bi.bi_idx;
1387 from->count -= copied;
1388 from->iov_offset = bi.bi_bvec_done;
1389
1390 skb->data_len += copied;
1391 skb->len += copied;
1392 skb->truesize += truesize;
1393 return ret;
1394 }
1395
io_send_zc_import(struct io_kiocb * req,unsigned int issue_flags)1396 static int io_send_zc_import(struct io_kiocb *req, unsigned int issue_flags)
1397 {
1398 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1399 struct io_async_msghdr *kmsg = req->async_data;
1400
1401 WARN_ON_ONCE(!(sr->flags & IORING_RECVSEND_FIXED_BUF));
1402
1403 sr->notif->buf_index = req->buf_index;
1404 return io_import_reg_buf(sr->notif, &kmsg->msg.msg_iter,
1405 (u64)(uintptr_t)sr->buf, sr->len,
1406 ITER_SOURCE, issue_flags);
1407 }
1408
io_send_zc(struct io_kiocb * req,unsigned int issue_flags)1409 int io_send_zc(struct io_kiocb *req, unsigned int issue_flags)
1410 {
1411 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
1412 struct io_async_msghdr *kmsg = req->async_data;
1413 struct socket *sock;
1414 unsigned msg_flags;
1415 int ret, min_ret = 0;
1416
1417 sock = sock_from_file(req->file);
1418 if (unlikely(!sock))
1419 return -ENOTSOCK;
1420 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags))
1421 return -EOPNOTSUPP;
1422
1423 if (!(req->flags & REQ_F_POLLED) &&
1424 (zc->flags & IORING_RECVSEND_POLL_FIRST))
1425 return -EAGAIN;
1426
1427 if (req->flags & REQ_F_IMPORT_BUFFER) {
1428 req->flags &= ~REQ_F_IMPORT_BUFFER;
1429 ret = io_send_zc_import(req, issue_flags);
1430 if (unlikely(ret))
1431 return ret;
1432 }
1433
1434 msg_flags = zc->msg_flags;
1435 if (issue_flags & IO_URING_F_NONBLOCK)
1436 msg_flags |= MSG_DONTWAIT;
1437 if (msg_flags & MSG_WAITALL)
1438 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
1439 msg_flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
1440
1441 kmsg->msg.msg_flags = msg_flags;
1442 kmsg->msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg;
1443 ret = sock_sendmsg(sock, &kmsg->msg);
1444
1445 if (unlikely(ret < min_ret)) {
1446 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1447 return -EAGAIN;
1448
1449 if (ret > 0 && io_net_retry(sock, kmsg->msg.msg_flags)) {
1450 zc->len -= ret;
1451 zc->buf += ret;
1452 zc->done_io += ret;
1453 req->flags |= REQ_F_BL_NO_RECYCLE;
1454 return -EAGAIN;
1455 }
1456 if (ret == -ERESTARTSYS)
1457 ret = -EINTR;
1458 req_set_fail(req);
1459 }
1460
1461 if (ret >= 0)
1462 ret += zc->done_io;
1463 else if (zc->done_io)
1464 ret = zc->done_io;
1465
1466 /*
1467 * If we're in io-wq we can't rely on tw ordering guarantees, defer
1468 * flushing notif to io_send_zc_cleanup()
1469 */
1470 if (!(issue_flags & IO_URING_F_UNLOCKED)) {
1471 io_notif_flush(zc->notif);
1472 zc->notif = NULL;
1473 io_req_msg_cleanup(req, 0);
1474 }
1475 io_req_set_res(req, ret, IORING_CQE_F_MORE);
1476 return IOU_COMPLETE;
1477 }
1478
io_sendmsg_zc(struct io_kiocb * req,unsigned int issue_flags)1479 int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
1480 {
1481 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1482 struct io_async_msghdr *kmsg = req->async_data;
1483 struct socket *sock;
1484 unsigned flags;
1485 int ret, min_ret = 0;
1486
1487 if (req->flags & REQ_F_IMPORT_BUFFER) {
1488 unsigned uvec_segs = kmsg->msg.msg_iter.nr_segs;
1489 int ret;
1490
1491 ret = io_import_reg_vec(ITER_SOURCE, &kmsg->msg.msg_iter, req,
1492 &kmsg->vec, uvec_segs, issue_flags);
1493 if (unlikely(ret))
1494 return ret;
1495 req->flags &= ~REQ_F_IMPORT_BUFFER;
1496 }
1497
1498 sock = sock_from_file(req->file);
1499 if (unlikely(!sock))
1500 return -ENOTSOCK;
1501 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags))
1502 return -EOPNOTSUPP;
1503
1504 if (!(req->flags & REQ_F_POLLED) &&
1505 (sr->flags & IORING_RECVSEND_POLL_FIRST))
1506 return -EAGAIN;
1507
1508 flags = sr->msg_flags;
1509 if (issue_flags & IO_URING_F_NONBLOCK)
1510 flags |= MSG_DONTWAIT;
1511 if (flags & MSG_WAITALL)
1512 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
1513
1514 kmsg->msg.msg_control_user = sr->msg_control;
1515 kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg;
1516 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
1517
1518 if (unlikely(ret < min_ret)) {
1519 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1520 return -EAGAIN;
1521
1522 if (ret > 0 && io_net_retry(sock, flags)) {
1523 sr->done_io += ret;
1524 req->flags |= REQ_F_BL_NO_RECYCLE;
1525 return -EAGAIN;
1526 }
1527 if (ret == -ERESTARTSYS)
1528 ret = -EINTR;
1529 req_set_fail(req);
1530 }
1531
1532 if (ret >= 0)
1533 ret += sr->done_io;
1534 else if (sr->done_io)
1535 ret = sr->done_io;
1536
1537 /*
1538 * If we're in io-wq we can't rely on tw ordering guarantees, defer
1539 * flushing notif to io_send_zc_cleanup()
1540 */
1541 if (!(issue_flags & IO_URING_F_UNLOCKED)) {
1542 io_notif_flush(sr->notif);
1543 sr->notif = NULL;
1544 io_req_msg_cleanup(req, 0);
1545 }
1546 io_req_set_res(req, ret, IORING_CQE_F_MORE);
1547 return IOU_COMPLETE;
1548 }
1549
io_sendrecv_fail(struct io_kiocb * req)1550 void io_sendrecv_fail(struct io_kiocb *req)
1551 {
1552 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1553
1554 if (sr->done_io)
1555 req->cqe.res = sr->done_io;
1556
1557 if ((req->flags & REQ_F_NEED_CLEANUP) &&
1558 (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC))
1559 req->cqe.flags |= IORING_CQE_F_MORE;
1560 }
1561
1562 #define ACCEPT_FLAGS (IORING_ACCEPT_MULTISHOT | IORING_ACCEPT_DONTWAIT | \
1563 IORING_ACCEPT_POLL_FIRST)
1564
io_accept_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)1565 int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1566 {
1567 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
1568
1569 if (sqe->len || sqe->buf_index)
1570 return -EINVAL;
1571
1572 accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1573 accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
1574 accept->flags = READ_ONCE(sqe->accept_flags);
1575 accept->nofile = rlimit(RLIMIT_NOFILE);
1576 accept->iou_flags = READ_ONCE(sqe->ioprio);
1577 if (accept->iou_flags & ~ACCEPT_FLAGS)
1578 return -EINVAL;
1579
1580 accept->file_slot = READ_ONCE(sqe->file_index);
1581 if (accept->file_slot) {
1582 if (accept->flags & SOCK_CLOEXEC)
1583 return -EINVAL;
1584 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT &&
1585 accept->file_slot != IORING_FILE_INDEX_ALLOC)
1586 return -EINVAL;
1587 }
1588 if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1589 return -EINVAL;
1590 if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK))
1591 accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1592 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT)
1593 req->flags |= REQ_F_APOLL_MULTISHOT;
1594 if (accept->iou_flags & IORING_ACCEPT_DONTWAIT)
1595 req->flags |= REQ_F_NOWAIT;
1596 return 0;
1597 }
1598
io_accept(struct io_kiocb * req,unsigned int issue_flags)1599 int io_accept(struct io_kiocb *req, unsigned int issue_flags)
1600 {
1601 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
1602 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1603 bool fixed = !!accept->file_slot;
1604 struct proto_accept_arg arg = {
1605 .flags = force_nonblock ? O_NONBLOCK : 0,
1606 };
1607 struct file *file;
1608 unsigned cflags;
1609 int ret, fd;
1610
1611 if (!(req->flags & REQ_F_POLLED) &&
1612 accept->iou_flags & IORING_ACCEPT_POLL_FIRST)
1613 return -EAGAIN;
1614
1615 retry:
1616 if (!fixed) {
1617 fd = __get_unused_fd_flags(accept->flags, accept->nofile);
1618 if (unlikely(fd < 0))
1619 return fd;
1620 }
1621 arg.err = 0;
1622 arg.is_empty = -1;
1623 file = do_accept(req->file, &arg, accept->addr, accept->addr_len,
1624 accept->flags);
1625 if (IS_ERR(file)) {
1626 if (!fixed)
1627 put_unused_fd(fd);
1628 ret = PTR_ERR(file);
1629 if (ret == -EAGAIN && force_nonblock &&
1630 !(accept->iou_flags & IORING_ACCEPT_DONTWAIT))
1631 return IOU_RETRY;
1632
1633 if (ret == -ERESTARTSYS)
1634 ret = -EINTR;
1635 } else if (!fixed) {
1636 fd_install(fd, file);
1637 ret = fd;
1638 } else {
1639 ret = io_fixed_fd_install(req, issue_flags, file,
1640 accept->file_slot);
1641 }
1642
1643 cflags = 0;
1644 if (!arg.is_empty)
1645 cflags |= IORING_CQE_F_SOCK_NONEMPTY;
1646
1647 if (ret >= 0 && (req->flags & REQ_F_APOLL_MULTISHOT) &&
1648 io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) {
1649 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || arg.is_empty == -1)
1650 goto retry;
1651 return IOU_RETRY;
1652 }
1653
1654 io_req_set_res(req, ret, cflags);
1655 if (ret < 0)
1656 req_set_fail(req);
1657 return IOU_COMPLETE;
1658 }
1659
io_socket_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)1660 int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1661 {
1662 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
1663
1664 if (sqe->addr || sqe->rw_flags || sqe->buf_index)
1665 return -EINVAL;
1666
1667 sock->domain = READ_ONCE(sqe->fd);
1668 sock->type = READ_ONCE(sqe->off);
1669 sock->protocol = READ_ONCE(sqe->len);
1670 sock->file_slot = READ_ONCE(sqe->file_index);
1671 sock->nofile = rlimit(RLIMIT_NOFILE);
1672
1673 sock->flags = sock->type & ~SOCK_TYPE_MASK;
1674 if (sock->file_slot && (sock->flags & SOCK_CLOEXEC))
1675 return -EINVAL;
1676 if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1677 return -EINVAL;
1678 return 0;
1679 }
1680
io_socket(struct io_kiocb * req,unsigned int issue_flags)1681 int io_socket(struct io_kiocb *req, unsigned int issue_flags)
1682 {
1683 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
1684 bool fixed = !!sock->file_slot;
1685 struct file *file;
1686 int ret, fd;
1687
1688 if (!fixed) {
1689 fd = __get_unused_fd_flags(sock->flags, sock->nofile);
1690 if (unlikely(fd < 0))
1691 return fd;
1692 }
1693 file = __sys_socket_file(sock->domain, sock->type, sock->protocol);
1694 if (IS_ERR(file)) {
1695 if (!fixed)
1696 put_unused_fd(fd);
1697 ret = PTR_ERR(file);
1698 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1699 return -EAGAIN;
1700 if (ret == -ERESTARTSYS)
1701 ret = -EINTR;
1702 req_set_fail(req);
1703 } else if (!fixed) {
1704 fd_install(fd, file);
1705 ret = fd;
1706 } else {
1707 ret = io_fixed_fd_install(req, issue_flags, file,
1708 sock->file_slot);
1709 }
1710 io_req_set_res(req, ret, 0);
1711 return IOU_COMPLETE;
1712 }
1713
io_connect_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)1714 int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1715 {
1716 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect);
1717 struct io_async_msghdr *io;
1718
1719 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
1720 return -EINVAL;
1721
1722 conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1723 conn->addr_len = READ_ONCE(sqe->addr2);
1724 conn->in_progress = conn->seen_econnaborted = false;
1725
1726 io = io_msg_alloc_async(req);
1727 if (unlikely(!io))
1728 return -ENOMEM;
1729
1730 return move_addr_to_kernel(conn->addr, conn->addr_len, &io->addr);
1731 }
1732
io_connect(struct io_kiocb * req,unsigned int issue_flags)1733 int io_connect(struct io_kiocb *req, unsigned int issue_flags)
1734 {
1735 struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect);
1736 struct io_async_msghdr *io = req->async_data;
1737 unsigned file_flags;
1738 int ret;
1739 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1740
1741 if (unlikely(req->flags & REQ_F_FAIL)) {
1742 ret = -ECONNRESET;
1743 goto out;
1744 }
1745
1746 file_flags = force_nonblock ? O_NONBLOCK : 0;
1747
1748 ret = __sys_connect_file(req->file, &io->addr, connect->addr_len,
1749 file_flags);
1750 if ((ret == -EAGAIN || ret == -EINPROGRESS || ret == -ECONNABORTED)
1751 && force_nonblock) {
1752 if (ret == -EINPROGRESS) {
1753 connect->in_progress = true;
1754 } else if (ret == -ECONNABORTED) {
1755 if (connect->seen_econnaborted)
1756 goto out;
1757 connect->seen_econnaborted = true;
1758 }
1759 return -EAGAIN;
1760 }
1761 if (connect->in_progress) {
1762 /*
1763 * At least bluetooth will return -EBADFD on a re-connect
1764 * attempt, and it's (supposedly) also valid to get -EISCONN
1765 * which means the previous result is good. For both of these,
1766 * grab the sock_error() and use that for the completion.
1767 */
1768 if (ret == -EBADFD || ret == -EISCONN)
1769 ret = sock_error(sock_from_file(req->file)->sk);
1770 }
1771 if (ret == -ERESTARTSYS)
1772 ret = -EINTR;
1773 out:
1774 if (ret < 0)
1775 req_set_fail(req);
1776 io_req_msg_cleanup(req, issue_flags);
1777 io_req_set_res(req, ret, 0);
1778 return IOU_COMPLETE;
1779 }
1780
io_bind_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)1781 int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1782 {
1783 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind);
1784 struct sockaddr __user *uaddr;
1785 struct io_async_msghdr *io;
1786
1787 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
1788 return -EINVAL;
1789
1790 uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1791 bind->addr_len = READ_ONCE(sqe->addr2);
1792
1793 io = io_msg_alloc_async(req);
1794 if (unlikely(!io))
1795 return -ENOMEM;
1796 return move_addr_to_kernel(uaddr, bind->addr_len, &io->addr);
1797 }
1798
io_bind(struct io_kiocb * req,unsigned int issue_flags)1799 int io_bind(struct io_kiocb *req, unsigned int issue_flags)
1800 {
1801 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind);
1802 struct io_async_msghdr *io = req->async_data;
1803 struct socket *sock;
1804 int ret;
1805
1806 sock = sock_from_file(req->file);
1807 if (unlikely(!sock))
1808 return -ENOTSOCK;
1809
1810 ret = __sys_bind_socket(sock, &io->addr, bind->addr_len);
1811 if (ret < 0)
1812 req_set_fail(req);
1813 io_req_set_res(req, ret, 0);
1814 return 0;
1815 }
1816
io_listen_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)1817 int io_listen_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1818 {
1819 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen);
1820
1821 if (sqe->addr || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in || sqe->addr2)
1822 return -EINVAL;
1823
1824 listen->backlog = READ_ONCE(sqe->len);
1825 return 0;
1826 }
1827
io_listen(struct io_kiocb * req,unsigned int issue_flags)1828 int io_listen(struct io_kiocb *req, unsigned int issue_flags)
1829 {
1830 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen);
1831 struct socket *sock;
1832 int ret;
1833
1834 sock = sock_from_file(req->file);
1835 if (unlikely(!sock))
1836 return -ENOTSOCK;
1837
1838 ret = __sys_listen_socket(sock, listen->backlog);
1839 if (ret < 0)
1840 req_set_fail(req);
1841 io_req_set_res(req, ret, 0);
1842 return 0;
1843 }
1844
io_netmsg_cache_free(const void * entry)1845 void io_netmsg_cache_free(const void *entry)
1846 {
1847 struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry;
1848
1849 io_vec_free(&kmsg->vec);
1850 kfree(kmsg);
1851 }
1852