1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/errno.h>
4 #include <linux/file.h>
5 #include <linux/slab.h>
6 #include <linux/net.h>
7 #include <linux/un.h>
8 #include <linux/compat.h>
9 #include <net/compat.h>
10 #include <linux/io_uring.h>
11
12 #include <uapi/linux/io_uring.h>
13
14 #include "filetable.h"
15 #include "io_uring.h"
16 #include "kbuf.h"
17 #include "alloc_cache.h"
18 #include "net.h"
19 #include "notif.h"
20 #include "rsrc.h"
21 #include "zcrx.h"
22
23 struct io_shutdown {
24 struct file *file;
25 int how;
26 };
27
28 struct io_accept {
29 struct file *file;
30 struct sockaddr __user *addr;
31 int __user *addr_len;
32 int flags;
33 int iou_flags;
34 u32 file_slot;
35 unsigned long nofile;
36 };
37
38 struct io_socket {
39 struct file *file;
40 int domain;
41 int type;
42 int protocol;
43 int flags;
44 u32 file_slot;
45 unsigned long nofile;
46 };
47
48 struct io_connect {
49 struct file *file;
50 struct sockaddr __user *addr;
51 int addr_len;
52 bool in_progress;
53 bool seen_econnaborted;
54 };
55
56 struct io_bind {
57 struct file *file;
58 int addr_len;
59 };
60
61 struct io_listen {
62 struct file *file;
63 int backlog;
64 };
65
66 struct io_sr_msg {
67 struct file *file;
68 union {
69 struct compat_msghdr __user *umsg_compat;
70 struct user_msghdr __user *umsg;
71 void __user *buf;
72 };
73 int len;
74 unsigned done_io;
75 unsigned msg_flags;
76 unsigned nr_multishot_loops;
77 u16 flags;
78 /* initialised and used only by !msg send variants */
79 u16 buf_group;
80 /* per-invocation mshot limit */
81 unsigned mshot_len;
82 /* overall mshot byte limit */
83 unsigned mshot_total_len;
84 void __user *msg_control;
85 /* used only for send zerocopy */
86 struct io_kiocb *notif;
87 };
88
89 /*
90 * The UAPI flags are the lower 8 bits, as that's all sqe->ioprio will hold
91 * anyway. Use the upper 8 bits for internal uses.
92 */
93 enum sr_retry_flags {
94 IORING_RECV_RETRY = (1U << 15),
95 IORING_RECV_PARTIAL_MAP = (1U << 14),
96 IORING_RECV_MSHOT_CAP = (1U << 13),
97 IORING_RECV_MSHOT_LIM = (1U << 12),
98 IORING_RECV_MSHOT_DONE = (1U << 11),
99
100 IORING_RECV_RETRY_CLEAR = IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP,
101 IORING_RECV_NO_RETRY = IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP |
102 IORING_RECV_MSHOT_CAP | IORING_RECV_MSHOT_DONE,
103 };
104
105 /*
106 * Number of times we'll try and do receives if there's more data. If we
107 * exceed this limit, then add us to the back of the queue and retry from
108 * there. This helps fairness between flooding clients.
109 */
110 #define MULTISHOT_MAX_RETRY 32
111
112 struct io_recvzc {
113 struct file *file;
114 u16 flags;
115 u32 len;
116 struct io_zcrx_ifq *ifq;
117 };
118
119 static int io_sg_from_iter_iovec(struct sk_buff *skb,
120 struct iov_iter *from, size_t length);
121 static int io_sg_from_iter(struct sk_buff *skb,
122 struct iov_iter *from, size_t length);
123
io_shutdown_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)124 int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
125 {
126 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
127
128 if (unlikely(sqe->off || sqe->addr || sqe->rw_flags ||
129 sqe->buf_index || sqe->splice_fd_in))
130 return -EINVAL;
131
132 shutdown->how = READ_ONCE(sqe->len);
133 req->flags |= REQ_F_FORCE_ASYNC;
134 return 0;
135 }
136
io_shutdown(struct io_kiocb * req,unsigned int issue_flags)137 int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
138 {
139 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
140 struct socket *sock;
141 int ret;
142
143 WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
144
145 sock = sock_from_file(req->file);
146 if (unlikely(!sock))
147 return -ENOTSOCK;
148
149 ret = __sys_shutdown_sock(sock, shutdown->how);
150 io_req_set_res(req, ret, 0);
151 return IOU_COMPLETE;
152 }
153
io_net_retry(struct socket * sock,int flags)154 static bool io_net_retry(struct socket *sock, int flags)
155 {
156 if (!(flags & MSG_WAITALL))
157 return false;
158 return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
159 }
160
io_netmsg_iovec_free(struct io_async_msghdr * kmsg)161 static void io_netmsg_iovec_free(struct io_async_msghdr *kmsg)
162 {
163 if (kmsg->vec.iovec)
164 io_vec_free(&kmsg->vec);
165 }
166
io_netmsg_recycle(struct io_kiocb * req,unsigned int issue_flags)167 static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags)
168 {
169 struct io_async_msghdr *hdr = req->async_data;
170
171 /* can't recycle, ensure we free the iovec if we have one */
172 if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) {
173 io_netmsg_iovec_free(hdr);
174 return;
175 }
176
177 /* Let normal cleanup path reap it if we fail adding to the cache */
178 io_alloc_cache_vec_kasan(&hdr->vec);
179 if (hdr->vec.nr > IO_VEC_CACHE_SOFT_CAP)
180 io_vec_free(&hdr->vec);
181
182 if (io_alloc_cache_put(&req->ctx->netmsg_cache, hdr))
183 io_req_async_data_clear(req, REQ_F_NEED_CLEANUP);
184 }
185
io_msg_alloc_async(struct io_kiocb * req)186 static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req)
187 {
188 struct io_ring_ctx *ctx = req->ctx;
189 struct io_async_msghdr *hdr;
190
191 hdr = io_uring_alloc_async_data(&ctx->netmsg_cache, req);
192 if (!hdr)
193 return NULL;
194
195 /* If the async data was cached, we might have an iov cached inside. */
196 if (hdr->vec.iovec)
197 req->flags |= REQ_F_NEED_CLEANUP;
198 return hdr;
199 }
200
io_mshot_prep_retry(struct io_kiocb * req,struct io_async_msghdr * kmsg)201 static inline void io_mshot_prep_retry(struct io_kiocb *req,
202 struct io_async_msghdr *kmsg)
203 {
204 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
205
206 req->flags &= ~REQ_F_BL_EMPTY;
207 sr->done_io = 0;
208 sr->flags &= ~IORING_RECV_RETRY_CLEAR;
209 sr->len = sr->mshot_len;
210 }
211
io_net_import_vec(struct io_kiocb * req,struct io_async_msghdr * iomsg,const struct iovec __user * uiov,unsigned uvec_seg,int ddir)212 static int io_net_import_vec(struct io_kiocb *req, struct io_async_msghdr *iomsg,
213 const struct iovec __user *uiov, unsigned uvec_seg,
214 int ddir)
215 {
216 struct iovec *iov;
217 int ret, nr_segs;
218
219 if (iomsg->vec.iovec) {
220 nr_segs = iomsg->vec.nr;
221 iov = iomsg->vec.iovec;
222 } else {
223 nr_segs = 1;
224 iov = &iomsg->fast_iov;
225 }
226
227 ret = __import_iovec(ddir, uiov, uvec_seg, nr_segs, &iov,
228 &iomsg->msg.msg_iter, io_is_compat(req->ctx));
229 if (unlikely(ret < 0))
230 return ret;
231
232 if (iov) {
233 req->flags |= REQ_F_NEED_CLEANUP;
234 io_vec_reset_iovec(&iomsg->vec, iov, iomsg->msg.msg_iter.nr_segs);
235 }
236 return 0;
237 }
238
io_compat_msg_copy_hdr(struct io_kiocb * req,struct io_async_msghdr * iomsg,struct compat_msghdr * msg,int ddir,struct sockaddr __user ** save_addr)239 static int io_compat_msg_copy_hdr(struct io_kiocb *req,
240 struct io_async_msghdr *iomsg,
241 struct compat_msghdr *msg, int ddir,
242 struct sockaddr __user **save_addr)
243 {
244 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
245 struct compat_iovec __user *uiov;
246 int ret;
247
248 if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg)))
249 return -EFAULT;
250
251 ret = __get_compat_msghdr(&iomsg->msg, msg, save_addr);
252 if (ret)
253 return ret;
254
255 uiov = compat_ptr(msg->msg_iov);
256 if (req->flags & REQ_F_BUFFER_SELECT) {
257 if (msg->msg_iovlen == 0) {
258 sr->len = 0;
259 } else if (msg->msg_iovlen > 1) {
260 return -EINVAL;
261 } else {
262 struct compat_iovec tmp_iov;
263
264 if (copy_from_user(&tmp_iov, uiov, sizeof(tmp_iov)))
265 return -EFAULT;
266 sr->len = tmp_iov.iov_len;
267 }
268 }
269 return 0;
270 }
271
io_copy_msghdr_from_user(struct user_msghdr * msg,struct user_msghdr __user * umsg)272 static int io_copy_msghdr_from_user(struct user_msghdr *msg,
273 struct user_msghdr __user *umsg)
274 {
275 if (!user_access_begin(umsg, sizeof(*umsg)))
276 return -EFAULT;
277 unsafe_get_user(msg->msg_name, &umsg->msg_name, ua_end);
278 unsafe_get_user(msg->msg_namelen, &umsg->msg_namelen, ua_end);
279 unsafe_get_user(msg->msg_iov, &umsg->msg_iov, ua_end);
280 unsafe_get_user(msg->msg_iovlen, &umsg->msg_iovlen, ua_end);
281 unsafe_get_user(msg->msg_control, &umsg->msg_control, ua_end);
282 unsafe_get_user(msg->msg_controllen, &umsg->msg_controllen, ua_end);
283 user_access_end();
284 return 0;
285 ua_end:
286 user_access_end();
287 return -EFAULT;
288 }
289
io_msg_copy_hdr(struct io_kiocb * req,struct io_async_msghdr * iomsg,struct user_msghdr * msg,int ddir,struct sockaddr __user ** save_addr)290 static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg,
291 struct user_msghdr *msg, int ddir,
292 struct sockaddr __user **save_addr)
293 {
294 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
295 struct user_msghdr __user *umsg = sr->umsg;
296 int ret;
297
298 iomsg->msg.msg_name = &iomsg->addr;
299 iomsg->msg.msg_iter.nr_segs = 0;
300
301 if (io_is_compat(req->ctx)) {
302 struct compat_msghdr cmsg;
303
304 ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ddir, save_addr);
305 if (ret)
306 return ret;
307
308 memset(msg, 0, sizeof(*msg));
309 msg->msg_namelen = cmsg.msg_namelen;
310 msg->msg_controllen = cmsg.msg_controllen;
311 msg->msg_iov = compat_ptr(cmsg.msg_iov);
312 msg->msg_iovlen = cmsg.msg_iovlen;
313 return 0;
314 }
315
316 ret = io_copy_msghdr_from_user(msg, umsg);
317 if (unlikely(ret))
318 return ret;
319
320 msg->msg_flags = 0;
321
322 ret = __copy_msghdr(&iomsg->msg, msg, save_addr);
323 if (ret)
324 return ret;
325
326 if (req->flags & REQ_F_BUFFER_SELECT) {
327 if (msg->msg_iovlen == 0) {
328 sr->len = 0;
329 } else if (msg->msg_iovlen > 1) {
330 return -EINVAL;
331 } else {
332 struct iovec __user *uiov = msg->msg_iov;
333 struct iovec tmp_iov;
334
335 if (copy_from_user(&tmp_iov, uiov, sizeof(tmp_iov)))
336 return -EFAULT;
337 sr->len = tmp_iov.iov_len;
338 }
339 }
340 return 0;
341 }
342
io_sendmsg_recvmsg_cleanup(struct io_kiocb * req)343 void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req)
344 {
345 struct io_async_msghdr *io = req->async_data;
346
347 io_netmsg_iovec_free(io);
348 }
349
io_send_setup(struct io_kiocb * req,const struct io_uring_sqe * sqe)350 static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe)
351 {
352 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
353 struct io_async_msghdr *kmsg = req->async_data;
354 void __user *addr;
355 u16 addr_len;
356 int ret;
357
358 sr->buf = u64_to_user_ptr(READ_ONCE(sqe->addr));
359
360 if (READ_ONCE(sqe->__pad3[0]))
361 return -EINVAL;
362
363 kmsg->msg.msg_name = NULL;
364 kmsg->msg.msg_namelen = 0;
365 kmsg->msg.msg_control = NULL;
366 kmsg->msg.msg_controllen = 0;
367 kmsg->msg.msg_ubuf = NULL;
368
369 addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
370 addr_len = READ_ONCE(sqe->addr_len);
371 if (addr) {
372 ret = move_addr_to_kernel(addr, addr_len, &kmsg->addr);
373 if (unlikely(ret < 0))
374 return ret;
375 kmsg->msg.msg_name = &kmsg->addr;
376 kmsg->msg.msg_namelen = addr_len;
377 }
378 if (sr->flags & IORING_RECVSEND_FIXED_BUF) {
379 if (!(sr->flags & IORING_SEND_VECTORIZED)) {
380 req->flags |= REQ_F_IMPORT_BUFFER;
381 return 0;
382 }
383
384 kmsg->msg.msg_iter.nr_segs = sr->len;
385 return io_prep_reg_iovec(req, &kmsg->vec, sr->buf, sr->len);
386 }
387 if (req->flags & REQ_F_BUFFER_SELECT)
388 return 0;
389
390 if (sr->flags & IORING_SEND_VECTORIZED)
391 return io_net_import_vec(req, kmsg, sr->buf, sr->len, ITER_SOURCE);
392
393 return import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter);
394 }
395
io_sendmsg_setup(struct io_kiocb * req,const struct io_uring_sqe * sqe)396 static int io_sendmsg_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe)
397 {
398 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
399 struct io_async_msghdr *kmsg = req->async_data;
400 struct user_msghdr msg;
401 int ret;
402
403 sr->flags |= IORING_SEND_VECTORIZED;
404 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
405 ret = io_msg_copy_hdr(req, kmsg, &msg, ITER_SOURCE, NULL);
406 if (unlikely(ret))
407 return ret;
408 /* save msg_control as sys_sendmsg() overwrites it */
409 sr->msg_control = kmsg->msg.msg_control_user;
410
411 if (sr->flags & IORING_RECVSEND_FIXED_BUF) {
412 kmsg->msg.msg_iter.nr_segs = msg.msg_iovlen;
413 return io_prep_reg_iovec(req, &kmsg->vec, msg.msg_iov,
414 msg.msg_iovlen);
415 }
416 if (req->flags & REQ_F_BUFFER_SELECT)
417 return 0;
418 return io_net_import_vec(req, kmsg, msg.msg_iov, msg.msg_iovlen, ITER_SOURCE);
419 }
420
421 #define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE | IORING_SEND_VECTORIZED)
422
io_sendmsg_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)423 int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
424 {
425 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
426
427 sr->done_io = 0;
428 sr->len = READ_ONCE(sqe->len);
429 if (unlikely(sr->len < 0))
430 return -EINVAL;
431 sr->flags = READ_ONCE(sqe->ioprio);
432 if (sr->flags & ~SENDMSG_FLAGS)
433 return -EINVAL;
434 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
435 if (sr->msg_flags & MSG_DONTWAIT)
436 req->flags |= REQ_F_NOWAIT;
437 if (req->flags & REQ_F_BUFFER_SELECT)
438 sr->buf_group = req->buf_index;
439 if (sr->flags & IORING_RECVSEND_BUNDLE) {
440 if (req->opcode == IORING_OP_SENDMSG)
441 return -EINVAL;
442 sr->msg_flags |= MSG_WAITALL;
443 req->flags |= REQ_F_MULTISHOT;
444 }
445
446 if (io_is_compat(req->ctx))
447 sr->msg_flags |= MSG_CMSG_COMPAT;
448
449 if (unlikely(!io_msg_alloc_async(req)))
450 return -ENOMEM;
451 if (req->opcode != IORING_OP_SENDMSG)
452 return io_send_setup(req, sqe);
453 if (unlikely(sqe->addr2 || sqe->file_index))
454 return -EINVAL;
455 return io_sendmsg_setup(req, sqe);
456 }
457
io_req_msg_cleanup(struct io_kiocb * req,unsigned int issue_flags)458 static void io_req_msg_cleanup(struct io_kiocb *req,
459 unsigned int issue_flags)
460 {
461 io_netmsg_recycle(req, issue_flags);
462 }
463
464 /*
465 * For bundle completions, we need to figure out how many segments we consumed.
466 * A bundle could be using a single ITER_UBUF if that's all we mapped, or it
467 * could be using an ITER_IOVEC. If the latter, then if we consumed all of
468 * the segments, then it's a trivial questiont o answer. If we have residual
469 * data in the iter, then loop the segments to figure out how much we
470 * transferred.
471 */
io_bundle_nbufs(struct io_async_msghdr * kmsg,int ret)472 static int io_bundle_nbufs(struct io_async_msghdr *kmsg, int ret)
473 {
474 struct iovec *iov;
475 int nbufs;
476
477 /* no data is always zero segments, and a ubuf is always 1 segment */
478 if (ret <= 0)
479 return 0;
480 if (iter_is_ubuf(&kmsg->msg.msg_iter))
481 return 1;
482
483 iov = kmsg->vec.iovec;
484 if (!iov)
485 iov = &kmsg->fast_iov;
486
487 /* if all data was transferred, it's basic pointer math */
488 if (!iov_iter_count(&kmsg->msg.msg_iter))
489 return iter_iov(&kmsg->msg.msg_iter) - iov;
490
491 /* short transfer, count segments */
492 nbufs = 0;
493 do {
494 int this_len = min_t(int, iov[nbufs].iov_len, ret);
495
496 nbufs++;
497 ret -= this_len;
498 } while (ret);
499
500 return nbufs;
501 }
502
io_net_kbuf_recyle(struct io_kiocb * req,struct io_buffer_list * bl,struct io_async_msghdr * kmsg,int len)503 static int io_net_kbuf_recyle(struct io_kiocb *req, struct io_buffer_list *bl,
504 struct io_async_msghdr *kmsg, int len)
505 {
506 req->flags |= REQ_F_BL_NO_RECYCLE;
507 if (req->flags & REQ_F_BUFFERS_COMMIT)
508 io_kbuf_commit(req, bl, len, io_bundle_nbufs(kmsg, len));
509 return IOU_RETRY;
510 }
511
io_send_finish(struct io_kiocb * req,struct io_async_msghdr * kmsg,struct io_br_sel * sel)512 static inline bool io_send_finish(struct io_kiocb *req,
513 struct io_async_msghdr *kmsg,
514 struct io_br_sel *sel)
515 {
516 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
517 bool bundle_finished = sel->val <= 0;
518 unsigned int cflags;
519
520 if (!(sr->flags & IORING_RECVSEND_BUNDLE)) {
521 cflags = io_put_kbuf(req, sel->val, sel->buf_list);
522 goto finish;
523 }
524
525 cflags = io_put_kbufs(req, sel->val, sel->buf_list, io_bundle_nbufs(kmsg, sel->val));
526
527 /*
528 * Don't start new bundles if the buffer list is empty, or if the
529 * current operation needed to go through polling to complete.
530 */
531 if (bundle_finished || req->flags & (REQ_F_BL_EMPTY | REQ_F_POLLED))
532 goto finish;
533
534 /*
535 * Fill CQE for this receive and see if we should keep trying to
536 * receive from this socket.
537 */
538 if (io_req_post_cqe(req, sel->val, cflags | IORING_CQE_F_MORE)) {
539 io_mshot_prep_retry(req, kmsg);
540 return false;
541 }
542
543 /* Otherwise stop bundle and use the current result. */
544 finish:
545 io_req_set_res(req, sel->val, cflags);
546 sel->val = IOU_COMPLETE;
547 return true;
548 }
549
io_sendmsg(struct io_kiocb * req,unsigned int issue_flags)550 int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
551 {
552 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
553 struct io_async_msghdr *kmsg = req->async_data;
554 struct socket *sock;
555 unsigned flags;
556 int min_ret = 0;
557 int ret;
558
559 sock = sock_from_file(req->file);
560 if (unlikely(!sock))
561 return -ENOTSOCK;
562
563 if (!(req->flags & REQ_F_POLLED) &&
564 (sr->flags & IORING_RECVSEND_POLL_FIRST))
565 return -EAGAIN;
566
567 flags = sr->msg_flags;
568 if (issue_flags & IO_URING_F_NONBLOCK)
569 flags |= MSG_DONTWAIT;
570 if (flags & MSG_WAITALL)
571 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
572
573 kmsg->msg.msg_control_user = sr->msg_control;
574
575 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
576
577 if (ret < min_ret) {
578 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
579 return -EAGAIN;
580 if (ret > 0 && io_net_retry(sock, flags)) {
581 kmsg->msg.msg_controllen = 0;
582 kmsg->msg.msg_control = NULL;
583 sr->done_io += ret;
584 return -EAGAIN;
585 }
586 if (ret == -ERESTARTSYS)
587 ret = -EINTR;
588 req_set_fail(req);
589 }
590 io_req_msg_cleanup(req, issue_flags);
591 if (ret >= 0)
592 ret += sr->done_io;
593 else if (sr->done_io)
594 ret = sr->done_io;
595 io_req_set_res(req, ret, 0);
596 return IOU_COMPLETE;
597 }
598
io_send_select_buffer(struct io_kiocb * req,unsigned int issue_flags,struct io_br_sel * sel,struct io_async_msghdr * kmsg)599 static int io_send_select_buffer(struct io_kiocb *req, unsigned int issue_flags,
600 struct io_br_sel *sel, struct io_async_msghdr *kmsg)
601 {
602 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
603 struct buf_sel_arg arg = {
604 .iovs = &kmsg->fast_iov,
605 .max_len = min_not_zero(sr->len, INT_MAX),
606 .nr_iovs = 1,
607 .buf_group = sr->buf_group,
608 };
609 int ret;
610
611 if (kmsg->vec.iovec) {
612 arg.nr_iovs = kmsg->vec.nr;
613 arg.iovs = kmsg->vec.iovec;
614 arg.mode = KBUF_MODE_FREE;
615 }
616
617 if (!(sr->flags & IORING_RECVSEND_BUNDLE))
618 arg.nr_iovs = 1;
619 else
620 arg.mode |= KBUF_MODE_EXPAND;
621
622 ret = io_buffers_select(req, &arg, sel, issue_flags);
623 if (unlikely(ret < 0))
624 return ret;
625
626 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) {
627 kmsg->vec.nr = ret;
628 kmsg->vec.iovec = arg.iovs;
629 req->flags |= REQ_F_NEED_CLEANUP;
630 }
631 sr->len = arg.out_len;
632
633 if (ret == 1) {
634 sr->buf = arg.iovs[0].iov_base;
635 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len,
636 &kmsg->msg.msg_iter);
637 if (unlikely(ret))
638 return ret;
639 } else {
640 iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE,
641 arg.iovs, ret, arg.out_len);
642 }
643
644 return 0;
645 }
646
io_send(struct io_kiocb * req,unsigned int issue_flags)647 int io_send(struct io_kiocb *req, unsigned int issue_flags)
648 {
649 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
650 struct io_async_msghdr *kmsg = req->async_data;
651 struct io_br_sel sel = { };
652 struct socket *sock;
653 unsigned flags;
654 int min_ret = 0;
655 int ret;
656
657 sock = sock_from_file(req->file);
658 if (unlikely(!sock))
659 return -ENOTSOCK;
660
661 if (!(req->flags & REQ_F_POLLED) &&
662 (sr->flags & IORING_RECVSEND_POLL_FIRST))
663 return -EAGAIN;
664
665 flags = sr->msg_flags;
666 if (issue_flags & IO_URING_F_NONBLOCK)
667 flags |= MSG_DONTWAIT;
668
669 retry_bundle:
670 sel.buf_list = NULL;
671 if (io_do_buffer_select(req)) {
672 ret = io_send_select_buffer(req, issue_flags, &sel, kmsg);
673 if (ret)
674 return ret;
675 }
676
677 /*
678 * If MSG_WAITALL is set, or this is a bundle send, then we need
679 * the full amount. If just bundle is set, if we do a short send
680 * then we complete the bundle sequence rather than continue on.
681 */
682 if (flags & MSG_WAITALL || sr->flags & IORING_RECVSEND_BUNDLE)
683 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
684
685 flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
686 kmsg->msg.msg_flags = flags;
687 ret = sock_sendmsg(sock, &kmsg->msg);
688 if (ret < min_ret) {
689 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
690 return -EAGAIN;
691
692 if (ret > 0 && io_net_retry(sock, flags)) {
693 sr->len -= ret;
694 sr->buf += ret;
695 sr->done_io += ret;
696 return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret);
697 }
698 if (ret == -ERESTARTSYS)
699 ret = -EINTR;
700 req_set_fail(req);
701 }
702 if (ret >= 0)
703 ret += sr->done_io;
704 else if (sr->done_io)
705 ret = sr->done_io;
706
707 sel.val = ret;
708 if (!io_send_finish(req, kmsg, &sel))
709 goto retry_bundle;
710
711 io_req_msg_cleanup(req, issue_flags);
712 return sel.val;
713 }
714
io_recvmsg_mshot_prep(struct io_kiocb * req,struct io_async_msghdr * iomsg,int namelen,size_t controllen)715 static int io_recvmsg_mshot_prep(struct io_kiocb *req,
716 struct io_async_msghdr *iomsg,
717 int namelen, size_t controllen)
718 {
719 if ((req->flags & (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) ==
720 (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) {
721 int hdr;
722
723 if (unlikely(namelen < 0))
724 return -EOVERFLOW;
725 if (check_add_overflow(sizeof(struct io_uring_recvmsg_out),
726 namelen, &hdr))
727 return -EOVERFLOW;
728 if (check_add_overflow(hdr, controllen, &hdr))
729 return -EOVERFLOW;
730
731 iomsg->namelen = namelen;
732 iomsg->controllen = controllen;
733 return 0;
734 }
735
736 return 0;
737 }
738
io_recvmsg_copy_hdr(struct io_kiocb * req,struct io_async_msghdr * iomsg)739 static int io_recvmsg_copy_hdr(struct io_kiocb *req,
740 struct io_async_msghdr *iomsg)
741 {
742 struct user_msghdr msg;
743 int ret;
744
745 ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST, &iomsg->uaddr);
746 if (unlikely(ret))
747 return ret;
748
749 if (!(req->flags & REQ_F_BUFFER_SELECT)) {
750 ret = io_net_import_vec(req, iomsg, msg.msg_iov, msg.msg_iovlen,
751 ITER_DEST);
752 if (unlikely(ret))
753 return ret;
754 }
755 return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen,
756 msg.msg_controllen);
757 }
758
io_recvmsg_prep_setup(struct io_kiocb * req)759 static int io_recvmsg_prep_setup(struct io_kiocb *req)
760 {
761 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
762 struct io_async_msghdr *kmsg;
763
764 kmsg = io_msg_alloc_async(req);
765 if (unlikely(!kmsg))
766 return -ENOMEM;
767
768 if (req->opcode == IORING_OP_RECV) {
769 kmsg->msg.msg_name = NULL;
770 kmsg->msg.msg_namelen = 0;
771 kmsg->msg.msg_inq = 0;
772 kmsg->msg.msg_control = NULL;
773 kmsg->msg.msg_get_inq = 1;
774 kmsg->msg.msg_controllen = 0;
775 kmsg->msg.msg_iocb = NULL;
776 kmsg->msg.msg_ubuf = NULL;
777
778 if (req->flags & REQ_F_BUFFER_SELECT)
779 return 0;
780 return import_ubuf(ITER_DEST, sr->buf, sr->len,
781 &kmsg->msg.msg_iter);
782 }
783
784 return io_recvmsg_copy_hdr(req, kmsg);
785 }
786
787 #define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT | \
788 IORING_RECVSEND_BUNDLE)
789
io_recvmsg_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)790 int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
791 {
792 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
793
794 sr->done_io = 0;
795
796 if (unlikely(sqe->addr2))
797 return -EINVAL;
798
799 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
800 sr->len = READ_ONCE(sqe->len);
801 if (unlikely(sr->len < 0))
802 return -EINVAL;
803 sr->flags = READ_ONCE(sqe->ioprio);
804 if (sr->flags & ~RECVMSG_FLAGS)
805 return -EINVAL;
806 sr->msg_flags = READ_ONCE(sqe->msg_flags);
807 if (sr->msg_flags & MSG_DONTWAIT)
808 req->flags |= REQ_F_NOWAIT;
809 if (sr->msg_flags & MSG_ERRQUEUE)
810 req->flags |= REQ_F_CLEAR_POLLIN;
811 if (req->flags & REQ_F_BUFFER_SELECT)
812 sr->buf_group = req->buf_index;
813 sr->mshot_total_len = sr->mshot_len = 0;
814 if (sr->flags & IORING_RECV_MULTISHOT) {
815 if (!(req->flags & REQ_F_BUFFER_SELECT))
816 return -EINVAL;
817 if (sr->msg_flags & MSG_WAITALL)
818 return -EINVAL;
819 if (req->opcode == IORING_OP_RECV) {
820 sr->mshot_len = sr->len;
821 sr->mshot_total_len = READ_ONCE(sqe->optlen);
822 if (sr->mshot_total_len)
823 sr->flags |= IORING_RECV_MSHOT_LIM;
824 } else if (sqe->optlen) {
825 return -EINVAL;
826 }
827 req->flags |= REQ_F_APOLL_MULTISHOT;
828 } else if (sqe->optlen) {
829 return -EINVAL;
830 }
831
832 if (sr->flags & IORING_RECVSEND_BUNDLE) {
833 if (req->opcode == IORING_OP_RECVMSG)
834 return -EINVAL;
835 }
836
837 if (io_is_compat(req->ctx))
838 sr->msg_flags |= MSG_CMSG_COMPAT;
839
840 sr->nr_multishot_loops = 0;
841 return io_recvmsg_prep_setup(req);
842 }
843
844 /* bits to clear in old and inherit in new cflags on bundle retry */
845 #define CQE_F_MASK (IORING_CQE_F_SOCK_NONEMPTY|IORING_CQE_F_MORE|\
846 IORING_CQE_F_BUF_MORE)
847
848 /*
849 * Finishes io_recv and io_recvmsg.
850 *
851 * Returns true if it is actually finished, or false if it should run
852 * again (for multishot).
853 */
io_recv_finish(struct io_kiocb * req,struct io_async_msghdr * kmsg,struct io_br_sel * sel,bool mshot_finished,unsigned issue_flags)854 static inline bool io_recv_finish(struct io_kiocb *req,
855 struct io_async_msghdr *kmsg,
856 struct io_br_sel *sel, bool mshot_finished,
857 unsigned issue_flags)
858 {
859 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
860 unsigned int cflags = 0;
861
862 if (kmsg->msg.msg_inq > 0)
863 cflags |= IORING_CQE_F_SOCK_NONEMPTY;
864
865 if (sel->val > 0 && sr->flags & IORING_RECV_MSHOT_LIM) {
866 /*
867 * If sr->len hits zero, the limit has been reached. Mark
868 * mshot as finished, and flag MSHOT_DONE as well to prevent
869 * a potential bundle from being retried.
870 */
871 sr->mshot_total_len -= min_t(int, sel->val, sr->mshot_total_len);
872 if (!sr->mshot_total_len) {
873 sr->flags |= IORING_RECV_MSHOT_DONE;
874 mshot_finished = true;
875 }
876 }
877
878 if (sr->flags & IORING_RECVSEND_BUNDLE) {
879 size_t this_ret = sel->val - sr->done_io;
880
881 cflags |= io_put_kbufs(req, this_ret, sel->buf_list, io_bundle_nbufs(kmsg, this_ret));
882 if (sr->flags & IORING_RECV_RETRY)
883 cflags = req->cqe.flags | (cflags & CQE_F_MASK);
884 if (sr->mshot_len && sel->val >= sr->mshot_len)
885 sr->flags |= IORING_RECV_MSHOT_CAP;
886 /* bundle with no more immediate buffers, we're done */
887 if (req->flags & REQ_F_BL_EMPTY)
888 goto finish;
889 /*
890 * If more is available AND it was a full transfer, retry and
891 * append to this one
892 */
893 if (!(sr->flags & IORING_RECV_NO_RETRY) &&
894 kmsg->msg.msg_inq > 1 && this_ret > 0 &&
895 !iov_iter_count(&kmsg->msg.msg_iter)) {
896 req->cqe.flags = cflags & ~CQE_F_MASK;
897 sr->len = kmsg->msg.msg_inq;
898 sr->done_io += this_ret;
899 sr->flags |= IORING_RECV_RETRY;
900 return false;
901 }
902 } else {
903 cflags |= io_put_kbuf(req, sel->val, sel->buf_list);
904 }
905
906 /*
907 * Fill CQE for this receive and see if we should keep trying to
908 * receive from this socket.
909 */
910 if ((req->flags & REQ_F_APOLL_MULTISHOT) && !mshot_finished &&
911 io_req_post_cqe(req, sel->val, cflags | IORING_CQE_F_MORE)) {
912 sel->val = IOU_RETRY;
913 io_mshot_prep_retry(req, kmsg);
914 /* Known not-empty or unknown state, retry */
915 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || kmsg->msg.msg_inq < 0) {
916 if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY &&
917 !(sr->flags & IORING_RECV_MSHOT_CAP)) {
918 return false;
919 }
920 /* mshot retries exceeded, force a requeue */
921 sr->nr_multishot_loops = 0;
922 sr->flags &= ~IORING_RECV_MSHOT_CAP;
923 if (issue_flags & IO_URING_F_MULTISHOT)
924 sel->val = IOU_REQUEUE;
925 }
926 return true;
927 }
928
929 /* Finish the request / stop multishot. */
930 finish:
931 io_req_set_res(req, sel->val, cflags);
932 sel->val = IOU_COMPLETE;
933 io_req_msg_cleanup(req, issue_flags);
934 return true;
935 }
936
io_recvmsg_prep_multishot(struct io_async_msghdr * kmsg,struct io_sr_msg * sr,void __user ** buf,size_t * len)937 static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg,
938 struct io_sr_msg *sr, void __user **buf,
939 size_t *len)
940 {
941 unsigned long ubuf = (unsigned long) *buf;
942 unsigned long hdr;
943
944 hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen +
945 kmsg->controllen;
946 if (*len < hdr)
947 return -EFAULT;
948
949 if (kmsg->controllen) {
950 unsigned long control = ubuf + hdr - kmsg->controllen;
951
952 kmsg->msg.msg_control_user = (void __user *) control;
953 kmsg->msg.msg_controllen = kmsg->controllen;
954 }
955
956 sr->buf = *buf; /* stash for later copy */
957 *buf = (void __user *) (ubuf + hdr);
958 kmsg->payloadlen = *len = *len - hdr;
959 return 0;
960 }
961
962 struct io_recvmsg_multishot_hdr {
963 struct io_uring_recvmsg_out msg;
964 struct sockaddr_storage addr;
965 };
966
io_recvmsg_multishot(struct socket * sock,struct io_sr_msg * io,struct io_async_msghdr * kmsg,unsigned int flags,bool * finished)967 static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io,
968 struct io_async_msghdr *kmsg,
969 unsigned int flags, bool *finished)
970 {
971 int err;
972 int copy_len;
973 struct io_recvmsg_multishot_hdr hdr;
974
975 if (kmsg->namelen)
976 kmsg->msg.msg_name = &hdr.addr;
977 kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
978 kmsg->msg.msg_namelen = 0;
979
980 if (sock->file->f_flags & O_NONBLOCK)
981 flags |= MSG_DONTWAIT;
982
983 err = sock_recvmsg(sock, &kmsg->msg, flags);
984 *finished = err <= 0;
985 if (err < 0)
986 return err;
987
988 hdr.msg = (struct io_uring_recvmsg_out) {
989 .controllen = kmsg->controllen - kmsg->msg.msg_controllen,
990 .flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT
991 };
992
993 hdr.msg.payloadlen = err;
994 if (err > kmsg->payloadlen)
995 err = kmsg->payloadlen;
996
997 copy_len = sizeof(struct io_uring_recvmsg_out);
998 if (kmsg->msg.msg_namelen > kmsg->namelen)
999 copy_len += kmsg->namelen;
1000 else
1001 copy_len += kmsg->msg.msg_namelen;
1002
1003 /*
1004 * "fromlen shall refer to the value before truncation.."
1005 * 1003.1g
1006 */
1007 hdr.msg.namelen = kmsg->msg.msg_namelen;
1008
1009 /* ensure that there is no gap between hdr and sockaddr_storage */
1010 BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) !=
1011 sizeof(struct io_uring_recvmsg_out));
1012 if (copy_to_user(io->buf, &hdr, copy_len)) {
1013 *finished = true;
1014 return -EFAULT;
1015 }
1016
1017 return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen +
1018 kmsg->controllen + err;
1019 }
1020
io_recvmsg(struct io_kiocb * req,unsigned int issue_flags)1021 int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
1022 {
1023 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1024 struct io_async_msghdr *kmsg = req->async_data;
1025 struct io_br_sel sel = { };
1026 struct socket *sock;
1027 unsigned flags;
1028 int ret, min_ret = 0;
1029 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1030 bool mshot_finished = true;
1031
1032 sock = sock_from_file(req->file);
1033 if (unlikely(!sock))
1034 return -ENOTSOCK;
1035
1036 if (!(req->flags & REQ_F_POLLED) &&
1037 (sr->flags & IORING_RECVSEND_POLL_FIRST))
1038 return -EAGAIN;
1039
1040 flags = sr->msg_flags;
1041 if (force_nonblock)
1042 flags |= MSG_DONTWAIT;
1043
1044 retry_multishot:
1045 sel.buf_list = NULL;
1046 if (io_do_buffer_select(req)) {
1047 size_t len = sr->len;
1048
1049 sel = io_buffer_select(req, &len, sr->buf_group, issue_flags);
1050 if (!sel.addr)
1051 return -ENOBUFS;
1052
1053 if (req->flags & REQ_F_APOLL_MULTISHOT) {
1054 ret = io_recvmsg_prep_multishot(kmsg, sr, &sel.addr, &len);
1055 if (ret) {
1056 io_kbuf_recycle(req, sel.buf_list, issue_flags);
1057 return ret;
1058 }
1059 }
1060
1061 iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, sel.addr, len);
1062 }
1063
1064 kmsg->msg.msg_get_inq = 1;
1065 kmsg->msg.msg_inq = -1;
1066 if (req->flags & REQ_F_APOLL_MULTISHOT) {
1067 ret = io_recvmsg_multishot(sock, sr, kmsg, flags,
1068 &mshot_finished);
1069 } else {
1070 /* disable partial retry for recvmsg with cmsg attached */
1071 if (flags & MSG_WAITALL && !kmsg->msg.msg_controllen)
1072 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
1073
1074 ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg,
1075 kmsg->uaddr, flags);
1076 }
1077
1078 if (ret < min_ret) {
1079 if (ret == -EAGAIN && force_nonblock) {
1080 io_kbuf_recycle(req, sel.buf_list, issue_flags);
1081 return IOU_RETRY;
1082 }
1083 if (ret > 0 && io_net_retry(sock, flags)) {
1084 sr->done_io += ret;
1085 return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret);
1086 }
1087 if (ret == -ERESTARTSYS)
1088 ret = -EINTR;
1089 req_set_fail(req);
1090 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
1091 req_set_fail(req);
1092 }
1093
1094 if (ret > 0)
1095 ret += sr->done_io;
1096 else if (sr->done_io)
1097 ret = sr->done_io;
1098 else
1099 io_kbuf_recycle(req, sel.buf_list, issue_flags);
1100
1101 sel.val = ret;
1102 if (!io_recv_finish(req, kmsg, &sel, mshot_finished, issue_flags))
1103 goto retry_multishot;
1104
1105 return sel.val;
1106 }
1107
io_recv_buf_select(struct io_kiocb * req,struct io_async_msghdr * kmsg,struct io_br_sel * sel,unsigned int issue_flags)1108 static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg,
1109 struct io_br_sel *sel, unsigned int issue_flags)
1110 {
1111 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1112 int ret;
1113
1114 /*
1115 * If the ring isn't locked, then don't use the peek interface
1116 * to grab multiple buffers as we will lock/unlock between
1117 * this selection and posting the buffers.
1118 */
1119 if (!(issue_flags & IO_URING_F_UNLOCKED) &&
1120 sr->flags & IORING_RECVSEND_BUNDLE) {
1121 struct buf_sel_arg arg = {
1122 .iovs = &kmsg->fast_iov,
1123 .nr_iovs = 1,
1124 .mode = KBUF_MODE_EXPAND,
1125 .buf_group = sr->buf_group,
1126 };
1127
1128 if (kmsg->vec.iovec) {
1129 arg.nr_iovs = kmsg->vec.nr;
1130 arg.iovs = kmsg->vec.iovec;
1131 arg.mode |= KBUF_MODE_FREE;
1132 }
1133
1134 if (sel->val)
1135 arg.max_len = sel->val;
1136 else if (kmsg->msg.msg_inq > 1)
1137 arg.max_len = min_not_zero(sel->val, (ssize_t) kmsg->msg.msg_inq);
1138
1139 /* if mshot limited, ensure we don't go over */
1140 if (sr->flags & IORING_RECV_MSHOT_LIM)
1141 arg.max_len = min_not_zero(arg.max_len, sr->mshot_total_len);
1142 ret = io_buffers_peek(req, &arg, sel);
1143 if (unlikely(ret < 0))
1144 return ret;
1145
1146 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) {
1147 kmsg->vec.nr = ret;
1148 kmsg->vec.iovec = arg.iovs;
1149 req->flags |= REQ_F_NEED_CLEANUP;
1150 }
1151 if (arg.partial_map)
1152 sr->flags |= IORING_RECV_PARTIAL_MAP;
1153
1154 /* special case 1 vec, can be a fast path */
1155 if (ret == 1) {
1156 sr->buf = arg.iovs[0].iov_base;
1157 sr->len = arg.iovs[0].iov_len;
1158 goto map_ubuf;
1159 }
1160 iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret,
1161 arg.out_len);
1162 } else {
1163 size_t len = sel->val;
1164
1165 *sel = io_buffer_select(req, &len, sr->buf_group, issue_flags);
1166 if (!sel->addr)
1167 return -ENOBUFS;
1168 sr->buf = sel->addr;
1169 sr->len = len;
1170 map_ubuf:
1171 ret = import_ubuf(ITER_DEST, sr->buf, sr->len,
1172 &kmsg->msg.msg_iter);
1173 if (unlikely(ret))
1174 return ret;
1175 }
1176
1177 return 0;
1178 }
1179
io_recv(struct io_kiocb * req,unsigned int issue_flags)1180 int io_recv(struct io_kiocb *req, unsigned int issue_flags)
1181 {
1182 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1183 struct io_async_msghdr *kmsg = req->async_data;
1184 struct io_br_sel sel;
1185 struct socket *sock;
1186 unsigned flags;
1187 int ret, min_ret = 0;
1188 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1189 bool mshot_finished;
1190
1191 if (!(req->flags & REQ_F_POLLED) &&
1192 (sr->flags & IORING_RECVSEND_POLL_FIRST))
1193 return -EAGAIN;
1194
1195 sock = sock_from_file(req->file);
1196 if (unlikely(!sock))
1197 return -ENOTSOCK;
1198
1199 flags = sr->msg_flags;
1200 if (force_nonblock)
1201 flags |= MSG_DONTWAIT;
1202
1203 retry_multishot:
1204 sel.buf_list = NULL;
1205 if (io_do_buffer_select(req)) {
1206 sel.val = sr->len;
1207 ret = io_recv_buf_select(req, kmsg, &sel, issue_flags);
1208 if (unlikely(ret < 0)) {
1209 kmsg->msg.msg_inq = -1;
1210 goto out_free;
1211 }
1212 sr->buf = NULL;
1213 }
1214
1215 kmsg->msg.msg_flags = 0;
1216 kmsg->msg.msg_inq = -1;
1217
1218 if (flags & MSG_WAITALL)
1219 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
1220
1221 ret = sock_recvmsg(sock, &kmsg->msg, flags);
1222 if (ret < min_ret) {
1223 if (ret == -EAGAIN && force_nonblock) {
1224 io_kbuf_recycle(req, sel.buf_list, issue_flags);
1225 return IOU_RETRY;
1226 }
1227 if (ret > 0 && io_net_retry(sock, flags)) {
1228 sr->len -= ret;
1229 sr->buf += ret;
1230 sr->done_io += ret;
1231 return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret);
1232 }
1233 if (ret == -ERESTARTSYS)
1234 ret = -EINTR;
1235 req_set_fail(req);
1236 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
1237 out_free:
1238 req_set_fail(req);
1239 }
1240
1241 mshot_finished = ret <= 0;
1242 if (ret > 0)
1243 ret += sr->done_io;
1244 else if (sr->done_io)
1245 ret = sr->done_io;
1246 else
1247 io_kbuf_recycle(req, sel.buf_list, issue_flags);
1248
1249 sel.val = ret;
1250 if (!io_recv_finish(req, kmsg, &sel, mshot_finished, issue_flags))
1251 goto retry_multishot;
1252
1253 return sel.val;
1254 }
1255
io_recvzc_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)1256 int io_recvzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1257 {
1258 struct io_recvzc *zc = io_kiocb_to_cmd(req, struct io_recvzc);
1259 unsigned ifq_idx;
1260
1261 if (unlikely(sqe->addr2 || sqe->addr || sqe->addr3))
1262 return -EINVAL;
1263
1264 ifq_idx = READ_ONCE(sqe->zcrx_ifq_idx);
1265 zc->ifq = xa_load(&req->ctx->zcrx_ctxs, ifq_idx);
1266 if (!zc->ifq)
1267 return -EINVAL;
1268
1269 zc->len = READ_ONCE(sqe->len);
1270 zc->flags = READ_ONCE(sqe->ioprio);
1271 if (READ_ONCE(sqe->msg_flags))
1272 return -EINVAL;
1273 if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT))
1274 return -EINVAL;
1275 /* multishot required */
1276 if (!(zc->flags & IORING_RECV_MULTISHOT))
1277 return -EINVAL;
1278 /* All data completions are posted as aux CQEs. */
1279 req->flags |= REQ_F_APOLL_MULTISHOT;
1280
1281 return 0;
1282 }
1283
io_recvzc(struct io_kiocb * req,unsigned int issue_flags)1284 int io_recvzc(struct io_kiocb *req, unsigned int issue_flags)
1285 {
1286 struct io_recvzc *zc = io_kiocb_to_cmd(req, struct io_recvzc);
1287 struct socket *sock;
1288 unsigned int len;
1289 int ret;
1290
1291 if (!(req->flags & REQ_F_POLLED) &&
1292 (zc->flags & IORING_RECVSEND_POLL_FIRST))
1293 return -EAGAIN;
1294
1295 sock = sock_from_file(req->file);
1296 if (unlikely(!sock))
1297 return -ENOTSOCK;
1298
1299 len = zc->len;
1300 ret = io_zcrx_recv(req, zc->ifq, sock, 0, issue_flags, &zc->len);
1301 if (len && zc->len == 0) {
1302 io_req_set_res(req, 0, 0);
1303
1304 return IOU_COMPLETE;
1305 }
1306 if (unlikely(ret <= 0) && ret != -EAGAIN) {
1307 if (ret == -ERESTARTSYS)
1308 ret = -EINTR;
1309 if (ret == IOU_REQUEUE)
1310 return IOU_REQUEUE;
1311
1312 req_set_fail(req);
1313 io_req_set_res(req, ret, 0);
1314 return IOU_COMPLETE;
1315 }
1316 return IOU_RETRY;
1317 }
1318
io_send_zc_cleanup(struct io_kiocb * req)1319 void io_send_zc_cleanup(struct io_kiocb *req)
1320 {
1321 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
1322 struct io_async_msghdr *io = req->async_data;
1323
1324 if (req_has_async_data(req))
1325 io_netmsg_iovec_free(io);
1326 if (zc->notif) {
1327 io_notif_flush(zc->notif);
1328 zc->notif = NULL;
1329 }
1330 }
1331
1332 #define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF)
1333 #define IO_ZC_FLAGS_VALID (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE | \
1334 IORING_SEND_VECTORIZED)
1335
io_send_zc_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)1336 int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1337 {
1338 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
1339 struct io_ring_ctx *ctx = req->ctx;
1340 struct io_async_msghdr *iomsg;
1341 struct io_kiocb *notif;
1342 u64 user_data;
1343 int ret;
1344
1345 zc->done_io = 0;
1346
1347 if (unlikely(READ_ONCE(sqe->__pad2[0])))
1348 return -EINVAL;
1349 /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */
1350 if (req->flags & REQ_F_CQE_SKIP)
1351 return -EINVAL;
1352
1353 notif = zc->notif = io_alloc_notif(ctx);
1354 if (!notif)
1355 return -ENOMEM;
1356 user_data = READ_ONCE(sqe->addr3);
1357 if (!user_data)
1358 user_data = req->cqe.user_data;
1359
1360 notif->cqe.user_data = user_data;
1361 notif->cqe.res = 0;
1362 notif->cqe.flags = IORING_CQE_F_NOTIF;
1363 req->flags |= REQ_F_NEED_CLEANUP | REQ_F_POLL_NO_LAZY;
1364
1365 zc->flags = READ_ONCE(sqe->ioprio);
1366 if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) {
1367 if (zc->flags & ~IO_ZC_FLAGS_VALID)
1368 return -EINVAL;
1369 if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) {
1370 struct io_notif_data *nd = io_notif_to_data(notif);
1371
1372 nd->zc_report = true;
1373 nd->zc_used = false;
1374 nd->zc_copied = false;
1375 }
1376 }
1377
1378 zc->len = READ_ONCE(sqe->len);
1379 zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL | MSG_ZEROCOPY;
1380 req->buf_index = READ_ONCE(sqe->buf_index);
1381 if (zc->msg_flags & MSG_DONTWAIT)
1382 req->flags |= REQ_F_NOWAIT;
1383
1384 if (io_is_compat(ctx))
1385 zc->msg_flags |= MSG_CMSG_COMPAT;
1386
1387 iomsg = io_msg_alloc_async(req);
1388 if (unlikely(!iomsg))
1389 return -ENOMEM;
1390
1391 if (req->opcode == IORING_OP_SEND_ZC) {
1392 ret = io_send_setup(req, sqe);
1393 } else {
1394 if (unlikely(sqe->addr2 || sqe->file_index))
1395 return -EINVAL;
1396 ret = io_sendmsg_setup(req, sqe);
1397 }
1398 if (unlikely(ret))
1399 return ret;
1400
1401 if (!(zc->flags & IORING_RECVSEND_FIXED_BUF)) {
1402 iomsg->msg.sg_from_iter = io_sg_from_iter_iovec;
1403 return io_notif_account_mem(zc->notif, iomsg->msg.msg_iter.count);
1404 }
1405 iomsg->msg.sg_from_iter = io_sg_from_iter;
1406 return 0;
1407 }
1408
io_sg_from_iter_iovec(struct sk_buff * skb,struct iov_iter * from,size_t length)1409 static int io_sg_from_iter_iovec(struct sk_buff *skb,
1410 struct iov_iter *from, size_t length)
1411 {
1412 skb_zcopy_downgrade_managed(skb);
1413 return zerocopy_fill_skb_from_iter(skb, from, length);
1414 }
1415
io_sg_from_iter(struct sk_buff * skb,struct iov_iter * from,size_t length)1416 static int io_sg_from_iter(struct sk_buff *skb,
1417 struct iov_iter *from, size_t length)
1418 {
1419 struct skb_shared_info *shinfo = skb_shinfo(skb);
1420 int frag = shinfo->nr_frags;
1421 int ret = 0;
1422 struct bvec_iter bi;
1423 ssize_t copied = 0;
1424 unsigned long truesize = 0;
1425
1426 if (!frag)
1427 shinfo->flags |= SKBFL_MANAGED_FRAG_REFS;
1428 else if (unlikely(!skb_zcopy_managed(skb)))
1429 return zerocopy_fill_skb_from_iter(skb, from, length);
1430
1431 bi.bi_size = min(from->count, length);
1432 bi.bi_bvec_done = from->iov_offset;
1433 bi.bi_idx = 0;
1434
1435 while (bi.bi_size && frag < MAX_SKB_FRAGS) {
1436 struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi);
1437
1438 copied += v.bv_len;
1439 truesize += PAGE_ALIGN(v.bv_len + v.bv_offset);
1440 __skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page,
1441 v.bv_offset, v.bv_len);
1442 bvec_iter_advance_single(from->bvec, &bi, v.bv_len);
1443 }
1444 if (bi.bi_size)
1445 ret = -EMSGSIZE;
1446
1447 shinfo->nr_frags = frag;
1448 from->bvec += bi.bi_idx;
1449 from->nr_segs -= bi.bi_idx;
1450 from->count -= copied;
1451 from->iov_offset = bi.bi_bvec_done;
1452
1453 skb->data_len += copied;
1454 skb->len += copied;
1455 skb->truesize += truesize;
1456 return ret;
1457 }
1458
io_send_zc_import(struct io_kiocb * req,struct io_async_msghdr * kmsg,unsigned int issue_flags)1459 static int io_send_zc_import(struct io_kiocb *req,
1460 struct io_async_msghdr *kmsg,
1461 unsigned int issue_flags)
1462 {
1463 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1464 struct io_kiocb *notif = sr->notif;
1465 int ret;
1466
1467 WARN_ON_ONCE(!(sr->flags & IORING_RECVSEND_FIXED_BUF));
1468
1469 notif->buf_index = req->buf_index;
1470
1471 if (!(sr->flags & IORING_SEND_VECTORIZED)) {
1472 ret = io_import_reg_buf(notif, &kmsg->msg.msg_iter,
1473 (u64)(uintptr_t)sr->buf, sr->len,
1474 ITER_SOURCE, issue_flags);
1475 } else {
1476 unsigned uvec_segs = kmsg->msg.msg_iter.nr_segs;
1477
1478 ret = io_import_reg_vec(ITER_SOURCE, &kmsg->msg.msg_iter,
1479 notif, &kmsg->vec, uvec_segs,
1480 issue_flags);
1481 }
1482
1483 if (unlikely(ret))
1484 return ret;
1485 req->flags &= ~REQ_F_IMPORT_BUFFER;
1486 return 0;
1487 }
1488
io_sendmsg_zc(struct io_kiocb * req,unsigned int issue_flags)1489 int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
1490 {
1491 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1492 struct io_async_msghdr *kmsg = req->async_data;
1493 struct socket *sock;
1494 unsigned msg_flags;
1495 int ret, min_ret = 0;
1496
1497 sock = sock_from_file(req->file);
1498 if (unlikely(!sock))
1499 return -ENOTSOCK;
1500 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags))
1501 return -EOPNOTSUPP;
1502 if (!(req->flags & REQ_F_POLLED) &&
1503 (sr->flags & IORING_RECVSEND_POLL_FIRST))
1504 return -EAGAIN;
1505
1506 if (req->flags & REQ_F_IMPORT_BUFFER) {
1507 ret = io_send_zc_import(req, kmsg, issue_flags);
1508 if (unlikely(ret))
1509 return ret;
1510 }
1511
1512 msg_flags = sr->msg_flags;
1513 if (issue_flags & IO_URING_F_NONBLOCK)
1514 msg_flags |= MSG_DONTWAIT;
1515 if (msg_flags & MSG_WAITALL)
1516 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
1517
1518 kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg;
1519
1520 if (req->opcode == IORING_OP_SEND_ZC) {
1521 msg_flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
1522 kmsg->msg.msg_flags = msg_flags;
1523 ret = sock_sendmsg(sock, &kmsg->msg);
1524 } else {
1525 kmsg->msg.msg_control_user = sr->msg_control;
1526 ret = __sys_sendmsg_sock(sock, &kmsg->msg, msg_flags);
1527 }
1528
1529 if (unlikely(ret < min_ret)) {
1530 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1531 return -EAGAIN;
1532
1533 if (ret > 0 && io_net_retry(sock, sr->msg_flags)) {
1534 sr->done_io += ret;
1535 return -EAGAIN;
1536 }
1537 if (ret == -ERESTARTSYS)
1538 ret = -EINTR;
1539 req_set_fail(req);
1540 }
1541
1542 if (ret >= 0)
1543 ret += sr->done_io;
1544 else if (sr->done_io)
1545 ret = sr->done_io;
1546
1547 /*
1548 * If we're in io-wq we can't rely on tw ordering guarantees, defer
1549 * flushing notif to io_send_zc_cleanup()
1550 */
1551 if (!(issue_flags & IO_URING_F_UNLOCKED)) {
1552 io_notif_flush(sr->notif);
1553 sr->notif = NULL;
1554 io_req_msg_cleanup(req, 0);
1555 }
1556 io_req_set_res(req, ret, IORING_CQE_F_MORE);
1557 return IOU_COMPLETE;
1558 }
1559
io_sendrecv_fail(struct io_kiocb * req)1560 void io_sendrecv_fail(struct io_kiocb *req)
1561 {
1562 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1563
1564 if (sr->done_io)
1565 req->cqe.res = sr->done_io;
1566
1567 if ((req->flags & REQ_F_NEED_CLEANUP) &&
1568 (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC))
1569 req->cqe.flags |= IORING_CQE_F_MORE;
1570 }
1571
1572 #define ACCEPT_FLAGS (IORING_ACCEPT_MULTISHOT | IORING_ACCEPT_DONTWAIT | \
1573 IORING_ACCEPT_POLL_FIRST)
1574
io_accept_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)1575 int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1576 {
1577 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
1578
1579 if (sqe->len || sqe->buf_index)
1580 return -EINVAL;
1581
1582 accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1583 accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
1584 accept->flags = READ_ONCE(sqe->accept_flags);
1585 accept->nofile = rlimit(RLIMIT_NOFILE);
1586 accept->iou_flags = READ_ONCE(sqe->ioprio);
1587 if (accept->iou_flags & ~ACCEPT_FLAGS)
1588 return -EINVAL;
1589
1590 accept->file_slot = READ_ONCE(sqe->file_index);
1591 if (accept->file_slot) {
1592 if (accept->flags & SOCK_CLOEXEC)
1593 return -EINVAL;
1594 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT &&
1595 accept->file_slot != IORING_FILE_INDEX_ALLOC)
1596 return -EINVAL;
1597 }
1598 if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1599 return -EINVAL;
1600 if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK))
1601 accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1602 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT)
1603 req->flags |= REQ_F_APOLL_MULTISHOT;
1604 if (accept->iou_flags & IORING_ACCEPT_DONTWAIT)
1605 req->flags |= REQ_F_NOWAIT;
1606 return 0;
1607 }
1608
io_accept(struct io_kiocb * req,unsigned int issue_flags)1609 int io_accept(struct io_kiocb *req, unsigned int issue_flags)
1610 {
1611 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
1612 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1613 bool fixed = !!accept->file_slot;
1614 struct proto_accept_arg arg = {
1615 .flags = force_nonblock ? O_NONBLOCK : 0,
1616 };
1617 struct file *file;
1618 unsigned cflags;
1619 int ret, fd;
1620
1621 if (!(req->flags & REQ_F_POLLED) &&
1622 accept->iou_flags & IORING_ACCEPT_POLL_FIRST)
1623 return -EAGAIN;
1624
1625 retry:
1626 if (!fixed) {
1627 fd = __get_unused_fd_flags(accept->flags, accept->nofile);
1628 if (unlikely(fd < 0))
1629 return fd;
1630 }
1631 arg.err = 0;
1632 arg.is_empty = -1;
1633 file = do_accept(req->file, &arg, accept->addr, accept->addr_len,
1634 accept->flags);
1635 if (IS_ERR(file)) {
1636 if (!fixed)
1637 put_unused_fd(fd);
1638 ret = PTR_ERR(file);
1639 if (ret == -EAGAIN && force_nonblock &&
1640 !(accept->iou_flags & IORING_ACCEPT_DONTWAIT))
1641 return IOU_RETRY;
1642
1643 if (ret == -ERESTARTSYS)
1644 ret = -EINTR;
1645 } else if (!fixed) {
1646 fd_install(fd, file);
1647 ret = fd;
1648 } else {
1649 ret = io_fixed_fd_install(req, issue_flags, file,
1650 accept->file_slot);
1651 }
1652
1653 cflags = 0;
1654 if (!arg.is_empty)
1655 cflags |= IORING_CQE_F_SOCK_NONEMPTY;
1656
1657 if (ret >= 0 && (req->flags & REQ_F_APOLL_MULTISHOT) &&
1658 io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) {
1659 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || arg.is_empty == -1)
1660 goto retry;
1661 return IOU_RETRY;
1662 }
1663
1664 io_req_set_res(req, ret, cflags);
1665 if (ret < 0)
1666 req_set_fail(req);
1667 return IOU_COMPLETE;
1668 }
1669
io_socket_bpf_populate(struct io_uring_bpf_ctx * bctx,struct io_kiocb * req)1670 void io_socket_bpf_populate(struct io_uring_bpf_ctx *bctx, struct io_kiocb *req)
1671 {
1672 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
1673
1674 bctx->socket.family = sock->domain;
1675 bctx->socket.type = sock->type;
1676 bctx->socket.protocol = sock->protocol;
1677 }
1678
io_socket_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)1679 int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1680 {
1681 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
1682
1683 if (sqe->addr || sqe->rw_flags || sqe->buf_index)
1684 return -EINVAL;
1685
1686 sock->domain = READ_ONCE(sqe->fd);
1687 sock->type = READ_ONCE(sqe->off);
1688 sock->protocol = READ_ONCE(sqe->len);
1689 sock->file_slot = READ_ONCE(sqe->file_index);
1690 sock->nofile = rlimit(RLIMIT_NOFILE);
1691
1692 sock->flags = sock->type & ~SOCK_TYPE_MASK;
1693 if (sock->file_slot && (sock->flags & SOCK_CLOEXEC))
1694 return -EINVAL;
1695 if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1696 return -EINVAL;
1697 return 0;
1698 }
1699
io_socket(struct io_kiocb * req,unsigned int issue_flags)1700 int io_socket(struct io_kiocb *req, unsigned int issue_flags)
1701 {
1702 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
1703 bool fixed = !!sock->file_slot;
1704 struct file *file;
1705 int ret, fd;
1706
1707 if (!fixed) {
1708 fd = __get_unused_fd_flags(sock->flags, sock->nofile);
1709 if (unlikely(fd < 0))
1710 return fd;
1711 }
1712 file = __sys_socket_file(sock->domain, sock->type, sock->protocol);
1713 if (IS_ERR(file)) {
1714 if (!fixed)
1715 put_unused_fd(fd);
1716 ret = PTR_ERR(file);
1717 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1718 return -EAGAIN;
1719 if (ret == -ERESTARTSYS)
1720 ret = -EINTR;
1721 req_set_fail(req);
1722 } else if (!fixed) {
1723 fd_install(fd, file);
1724 ret = fd;
1725 } else {
1726 ret = io_fixed_fd_install(req, issue_flags, file,
1727 sock->file_slot);
1728 }
1729 io_req_set_res(req, ret, 0);
1730 return IOU_COMPLETE;
1731 }
1732
io_connect_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)1733 int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1734 {
1735 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect);
1736 struct io_async_msghdr *io;
1737
1738 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
1739 return -EINVAL;
1740
1741 conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1742 conn->addr_len = READ_ONCE(sqe->addr2);
1743 conn->in_progress = conn->seen_econnaborted = false;
1744
1745 io = io_msg_alloc_async(req);
1746 if (unlikely(!io))
1747 return -ENOMEM;
1748
1749 return move_addr_to_kernel(conn->addr, conn->addr_len, &io->addr);
1750 }
1751
io_connect(struct io_kiocb * req,unsigned int issue_flags)1752 int io_connect(struct io_kiocb *req, unsigned int issue_flags)
1753 {
1754 struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect);
1755 struct io_async_msghdr *io = req->async_data;
1756 unsigned file_flags;
1757 int ret;
1758 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1759
1760 if (connect->in_progress) {
1761 struct poll_table_struct pt = { ._key = EPOLLERR };
1762
1763 if (vfs_poll(req->file, &pt) & EPOLLERR)
1764 goto get_sock_err;
1765 }
1766
1767 file_flags = force_nonblock ? O_NONBLOCK : 0;
1768
1769 ret = __sys_connect_file(req->file, &io->addr, connect->addr_len,
1770 file_flags);
1771 if ((ret == -EAGAIN || ret == -EINPROGRESS || ret == -ECONNABORTED)
1772 && force_nonblock) {
1773 if (ret == -EINPROGRESS) {
1774 connect->in_progress = true;
1775 } else if (ret == -ECONNABORTED) {
1776 if (connect->seen_econnaborted)
1777 goto out;
1778 connect->seen_econnaborted = true;
1779 }
1780 return -EAGAIN;
1781 }
1782 if (connect->in_progress) {
1783 /*
1784 * At least bluetooth will return -EBADFD on a re-connect
1785 * attempt, and it's (supposedly) also valid to get -EISCONN
1786 * which means the previous result is good. For both of these,
1787 * grab the sock_error() and use that for the completion.
1788 */
1789 if (ret == -EBADFD || ret == -EISCONN) {
1790 get_sock_err:
1791 ret = sock_error(sock_from_file(req->file)->sk);
1792 }
1793 }
1794 if (ret == -ERESTARTSYS)
1795 ret = -EINTR;
1796 out:
1797 if (ret < 0)
1798 req_set_fail(req);
1799 io_req_msg_cleanup(req, issue_flags);
1800 io_req_set_res(req, ret, 0);
1801 return IOU_COMPLETE;
1802 }
1803
1804 /*
1805 * Check if bind request would potentially end up with filename_create(),
1806 * which in turn end up in mnt_want_write() which will grab the fs
1807 * percpu start write sem. This can trigger a lockdep warning.
1808 */
io_bind_file_create(const struct io_async_msghdr * io,int addr_len)1809 static int io_bind_file_create(const struct io_async_msghdr *io, int addr_len)
1810 {
1811 const struct sockaddr_un *sun;
1812
1813 if (io->addr.ss_family != AF_UNIX)
1814 return 0;
1815 if (addr_len <= offsetof(struct sockaddr_un, sun_path))
1816 return 0;
1817 sun = (const struct sockaddr_un *) &io->addr;
1818 return sun->sun_path[0] != '\0';
1819 }
1820
io_bind_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)1821 int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1822 {
1823 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind);
1824 struct sockaddr __user *uaddr;
1825 struct io_async_msghdr *io;
1826 int ret;
1827
1828 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
1829 return -EINVAL;
1830
1831 uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1832 bind->addr_len = READ_ONCE(sqe->addr2);
1833
1834 io = io_msg_alloc_async(req);
1835 if (unlikely(!io))
1836 return -ENOMEM;
1837 ret = move_addr_to_kernel(uaddr, bind->addr_len, &io->addr);
1838 if (unlikely(ret))
1839 return ret;
1840 if (io_bind_file_create(io, bind->addr_len))
1841 req->flags |= REQ_F_FORCE_ASYNC;
1842 return 0;
1843 }
1844
io_bind(struct io_kiocb * req,unsigned int issue_flags)1845 int io_bind(struct io_kiocb *req, unsigned int issue_flags)
1846 {
1847 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind);
1848 struct io_async_msghdr *io = req->async_data;
1849 struct socket *sock;
1850 int ret;
1851
1852 sock = sock_from_file(req->file);
1853 if (unlikely(!sock))
1854 return -ENOTSOCK;
1855
1856 ret = __sys_bind_socket(sock, &io->addr, bind->addr_len);
1857 if (ret < 0)
1858 req_set_fail(req);
1859 io_req_set_res(req, ret, 0);
1860 return 0;
1861 }
1862
io_listen_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)1863 int io_listen_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1864 {
1865 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen);
1866
1867 if (sqe->addr || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in || sqe->addr2)
1868 return -EINVAL;
1869
1870 listen->backlog = READ_ONCE(sqe->len);
1871 return 0;
1872 }
1873
io_listen(struct io_kiocb * req,unsigned int issue_flags)1874 int io_listen(struct io_kiocb *req, unsigned int issue_flags)
1875 {
1876 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen);
1877 struct socket *sock;
1878 int ret;
1879
1880 sock = sock_from_file(req->file);
1881 if (unlikely(!sock))
1882 return -ENOTSOCK;
1883
1884 ret = __sys_listen_socket(sock, listen->backlog);
1885 if (ret < 0)
1886 req_set_fail(req);
1887 io_req_set_res(req, ret, 0);
1888 return 0;
1889 }
1890
io_netmsg_cache_free(const void * entry)1891 void io_netmsg_cache_free(const void *entry)
1892 {
1893 struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry;
1894
1895 io_vec_free(&kmsg->vec);
1896 kfree(kmsg);
1897 }
1898