xref: /linux/io_uring/net.c (revision e814f3fd16acfb7f9966773953de8f740a1e3202)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/errno.h>
4 #include <linux/file.h>
5 #include <linux/slab.h>
6 #include <linux/net.h>
7 #include <linux/compat.h>
8 #include <net/compat.h>
9 #include <linux/io_uring.h>
10 
11 #include <uapi/linux/io_uring.h>
12 
13 #include "io_uring.h"
14 #include "kbuf.h"
15 #include "alloc_cache.h"
16 #include "net.h"
17 #include "notif.h"
18 #include "rsrc.h"
19 
20 #if defined(CONFIG_NET)
21 struct io_shutdown {
22 	struct file			*file;
23 	int				how;
24 };
25 
26 struct io_accept {
27 	struct file			*file;
28 	struct sockaddr __user		*addr;
29 	int __user			*addr_len;
30 	int				flags;
31 	int				iou_flags;
32 	u32				file_slot;
33 	unsigned long			nofile;
34 };
35 
36 struct io_socket {
37 	struct file			*file;
38 	int				domain;
39 	int				type;
40 	int				protocol;
41 	int				flags;
42 	u32				file_slot;
43 	unsigned long			nofile;
44 };
45 
46 struct io_connect {
47 	struct file			*file;
48 	struct sockaddr __user		*addr;
49 	int				addr_len;
50 	bool				in_progress;
51 	bool				seen_econnaborted;
52 };
53 
54 struct io_bind {
55 	struct file			*file;
56 	int				addr_len;
57 };
58 
59 struct io_listen {
60 	struct file			*file;
61 	int				backlog;
62 };
63 
64 struct io_sr_msg {
65 	struct file			*file;
66 	union {
67 		struct compat_msghdr __user	*umsg_compat;
68 		struct user_msghdr __user	*umsg;
69 		void __user			*buf;
70 	};
71 	int				len;
72 	unsigned			done_io;
73 	unsigned			msg_flags;
74 	unsigned			nr_multishot_loops;
75 	u16				flags;
76 	/* initialised and used only by !msg send variants */
77 	u16				buf_group;
78 	u16				buf_index;
79 	void __user			*msg_control;
80 	/* used only for send zerocopy */
81 	struct io_kiocb 		*notif;
82 };
83 
84 /*
85  * Number of times we'll try and do receives if there's more data. If we
86  * exceed this limit, then add us to the back of the queue and retry from
87  * there. This helps fairness between flooding clients.
88  */
89 #define MULTISHOT_MAX_RETRY	32
90 
91 int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
92 {
93 	struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
94 
95 	if (unlikely(sqe->off || sqe->addr || sqe->rw_flags ||
96 		     sqe->buf_index || sqe->splice_fd_in))
97 		return -EINVAL;
98 
99 	shutdown->how = READ_ONCE(sqe->len);
100 	req->flags |= REQ_F_FORCE_ASYNC;
101 	return 0;
102 }
103 
104 int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
105 {
106 	struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
107 	struct socket *sock;
108 	int ret;
109 
110 	WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
111 
112 	sock = sock_from_file(req->file);
113 	if (unlikely(!sock))
114 		return -ENOTSOCK;
115 
116 	ret = __sys_shutdown_sock(sock, shutdown->how);
117 	io_req_set_res(req, ret, 0);
118 	return IOU_OK;
119 }
120 
121 static bool io_net_retry(struct socket *sock, int flags)
122 {
123 	if (!(flags & MSG_WAITALL))
124 		return false;
125 	return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
126 }
127 
128 static void io_netmsg_iovec_free(struct io_async_msghdr *kmsg)
129 {
130 	if (kmsg->free_iov) {
131 		kfree(kmsg->free_iov);
132 		kmsg->free_iov_nr = 0;
133 		kmsg->free_iov = NULL;
134 	}
135 }
136 
137 static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags)
138 {
139 	struct io_async_msghdr *hdr = req->async_data;
140 	struct iovec *iov;
141 
142 	/* can't recycle, ensure we free the iovec if we have one */
143 	if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) {
144 		io_netmsg_iovec_free(hdr);
145 		return;
146 	}
147 
148 	/* Let normal cleanup path reap it if we fail adding to the cache */
149 	iov = hdr->free_iov;
150 	if (io_alloc_cache_put(&req->ctx->netmsg_cache, hdr)) {
151 		if (iov)
152 			kasan_mempool_poison_object(iov);
153 		req->async_data = NULL;
154 		req->flags &= ~REQ_F_ASYNC_DATA;
155 	}
156 }
157 
158 static void io_msg_async_data_init(void *obj)
159 {
160 	struct io_async_msghdr *hdr = (struct io_async_msghdr *)obj;
161 
162 	hdr->free_iov = NULL;
163 	hdr->free_iov_nr = 0;
164 }
165 
166 static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req)
167 {
168 	struct io_ring_ctx *ctx = req->ctx;
169 	struct io_async_msghdr *hdr;
170 
171 	hdr = io_uring_alloc_async_data(&ctx->netmsg_cache, req,
172 					io_msg_async_data_init);
173 	if (!hdr)
174 		return NULL;
175 
176 	/* If the async data was cached, we might have an iov cached inside. */
177 	if (hdr->free_iov) {
178 		kasan_mempool_unpoison_object(hdr->free_iov,
179 					      hdr->free_iov_nr * sizeof(struct iovec));
180 		req->flags |= REQ_F_NEED_CLEANUP;
181 	}
182 	return hdr;
183 }
184 
185 /* assign new iovec to kmsg, if we need to */
186 static int io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg,
187 			     struct iovec *iov)
188 {
189 	if (iov) {
190 		req->flags |= REQ_F_NEED_CLEANUP;
191 		kmsg->free_iov_nr = kmsg->msg.msg_iter.nr_segs;
192 		if (kmsg->free_iov)
193 			kfree(kmsg->free_iov);
194 		kmsg->free_iov = iov;
195 	}
196 	return 0;
197 }
198 
199 static inline void io_mshot_prep_retry(struct io_kiocb *req,
200 				       struct io_async_msghdr *kmsg)
201 {
202 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
203 
204 	req->flags &= ~REQ_F_BL_EMPTY;
205 	sr->done_io = 0;
206 	sr->len = 0; /* get from the provided buffer */
207 	req->buf_index = sr->buf_group;
208 }
209 
210 #ifdef CONFIG_COMPAT
211 static int io_compat_msg_copy_hdr(struct io_kiocb *req,
212 				  struct io_async_msghdr *iomsg,
213 				  struct compat_msghdr *msg, int ddir)
214 {
215 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
216 	struct compat_iovec __user *uiov;
217 	struct iovec *iov;
218 	int ret, nr_segs;
219 
220 	if (iomsg->free_iov) {
221 		nr_segs = iomsg->free_iov_nr;
222 		iov = iomsg->free_iov;
223 	} else {
224 		iov = &iomsg->fast_iov;
225 		nr_segs = 1;
226 	}
227 
228 	if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg)))
229 		return -EFAULT;
230 
231 	uiov = compat_ptr(msg->msg_iov);
232 	if (req->flags & REQ_F_BUFFER_SELECT) {
233 		compat_ssize_t clen;
234 
235 		if (msg->msg_iovlen == 0) {
236 			sr->len = iov->iov_len = 0;
237 			iov->iov_base = NULL;
238 		} else if (msg->msg_iovlen > 1) {
239 			return -EINVAL;
240 		} else {
241 			if (!access_ok(uiov, sizeof(*uiov)))
242 				return -EFAULT;
243 			if (__get_user(clen, &uiov->iov_len))
244 				return -EFAULT;
245 			if (clen < 0)
246 				return -EINVAL;
247 			sr->len = clen;
248 		}
249 
250 		return 0;
251 	}
252 
253 	ret = __import_iovec(ddir, (struct iovec __user *)uiov, msg->msg_iovlen,
254 				nr_segs, &iov, &iomsg->msg.msg_iter, true);
255 	if (unlikely(ret < 0))
256 		return ret;
257 
258 	return io_net_vec_assign(req, iomsg, iov);
259 }
260 #endif
261 
262 static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg,
263 			   struct user_msghdr *msg, int ddir)
264 {
265 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
266 	struct user_msghdr __user *umsg = sr->umsg;
267 	struct iovec *iov;
268 	int ret, nr_segs;
269 
270 	if (iomsg->free_iov) {
271 		nr_segs = iomsg->free_iov_nr;
272 		iov = iomsg->free_iov;
273 	} else {
274 		iov = &iomsg->fast_iov;
275 		nr_segs = 1;
276 	}
277 
278 	if (!user_access_begin(umsg, sizeof(*umsg)))
279 		return -EFAULT;
280 
281 	ret = -EFAULT;
282 	unsafe_get_user(msg->msg_name, &umsg->msg_name, ua_end);
283 	unsafe_get_user(msg->msg_namelen, &umsg->msg_namelen, ua_end);
284 	unsafe_get_user(msg->msg_iov, &umsg->msg_iov, ua_end);
285 	unsafe_get_user(msg->msg_iovlen, &umsg->msg_iovlen, ua_end);
286 	unsafe_get_user(msg->msg_control, &umsg->msg_control, ua_end);
287 	unsafe_get_user(msg->msg_controllen, &umsg->msg_controllen, ua_end);
288 	msg->msg_flags = 0;
289 
290 	if (req->flags & REQ_F_BUFFER_SELECT) {
291 		if (msg->msg_iovlen == 0) {
292 			sr->len = iov->iov_len = 0;
293 			iov->iov_base = NULL;
294 		} else if (msg->msg_iovlen > 1) {
295 			ret = -EINVAL;
296 			goto ua_end;
297 		} else {
298 			/* we only need the length for provided buffers */
299 			if (!access_ok(&msg->msg_iov[0].iov_len, sizeof(__kernel_size_t)))
300 				goto ua_end;
301 			unsafe_get_user(iov->iov_len, &msg->msg_iov[0].iov_len,
302 					ua_end);
303 			sr->len = iov->iov_len;
304 		}
305 		ret = 0;
306 ua_end:
307 		user_access_end();
308 		return ret;
309 	}
310 
311 	user_access_end();
312 	ret = __import_iovec(ddir, msg->msg_iov, msg->msg_iovlen, nr_segs,
313 				&iov, &iomsg->msg.msg_iter, false);
314 	if (unlikely(ret < 0))
315 		return ret;
316 
317 	return io_net_vec_assign(req, iomsg, iov);
318 }
319 
320 static int io_sendmsg_copy_hdr(struct io_kiocb *req,
321 			       struct io_async_msghdr *iomsg)
322 {
323 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
324 	struct user_msghdr msg;
325 	int ret;
326 
327 	iomsg->msg.msg_name = &iomsg->addr;
328 	iomsg->msg.msg_iter.nr_segs = 0;
329 
330 #ifdef CONFIG_COMPAT
331 	if (unlikely(req->ctx->compat)) {
332 		struct compat_msghdr cmsg;
333 
334 		ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_SOURCE);
335 		if (unlikely(ret))
336 			return ret;
337 
338 		return __get_compat_msghdr(&iomsg->msg, &cmsg, NULL);
339 	}
340 #endif
341 
342 	ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_SOURCE);
343 	if (unlikely(ret))
344 		return ret;
345 
346 	ret = __copy_msghdr(&iomsg->msg, &msg, NULL);
347 
348 	/* save msg_control as sys_sendmsg() overwrites it */
349 	sr->msg_control = iomsg->msg.msg_control_user;
350 	return ret;
351 }
352 
353 void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req)
354 {
355 	struct io_async_msghdr *io = req->async_data;
356 
357 	io_netmsg_iovec_free(io);
358 }
359 
360 static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe)
361 {
362 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
363 	struct io_async_msghdr *kmsg = req->async_data;
364 	void __user *addr;
365 	u16 addr_len;
366 	int ret;
367 
368 	sr->buf = u64_to_user_ptr(READ_ONCE(sqe->addr));
369 
370 	if (READ_ONCE(sqe->__pad3[0]))
371 		return -EINVAL;
372 
373 	kmsg->msg.msg_name = NULL;
374 	kmsg->msg.msg_namelen = 0;
375 	kmsg->msg.msg_control = NULL;
376 	kmsg->msg.msg_controllen = 0;
377 	kmsg->msg.msg_ubuf = NULL;
378 
379 	addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
380 	addr_len = READ_ONCE(sqe->addr_len);
381 	if (addr) {
382 		ret = move_addr_to_kernel(addr, addr_len, &kmsg->addr);
383 		if (unlikely(ret < 0))
384 			return ret;
385 		kmsg->msg.msg_name = &kmsg->addr;
386 		kmsg->msg.msg_namelen = addr_len;
387 	}
388 	if (!io_do_buffer_select(req)) {
389 		ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len,
390 				  &kmsg->msg.msg_iter);
391 		if (unlikely(ret < 0))
392 			return ret;
393 	}
394 	return 0;
395 }
396 
397 static int io_sendmsg_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe)
398 {
399 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
400 	struct io_async_msghdr *kmsg = req->async_data;
401 	int ret;
402 
403 	sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
404 
405 	ret = io_sendmsg_copy_hdr(req, kmsg);
406 	if (!ret)
407 		req->flags |= REQ_F_NEED_CLEANUP;
408 	return ret;
409 }
410 
411 #define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE)
412 
413 int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
414 {
415 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
416 
417 	sr->done_io = 0;
418 
419 	if (req->opcode != IORING_OP_SEND) {
420 		if (sqe->addr2 || sqe->file_index)
421 			return -EINVAL;
422 	}
423 
424 	sr->len = READ_ONCE(sqe->len);
425 	sr->flags = READ_ONCE(sqe->ioprio);
426 	if (sr->flags & ~SENDMSG_FLAGS)
427 		return -EINVAL;
428 	sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
429 	if (sr->msg_flags & MSG_DONTWAIT)
430 		req->flags |= REQ_F_NOWAIT;
431 	if (sr->flags & IORING_RECVSEND_BUNDLE) {
432 		if (req->opcode == IORING_OP_SENDMSG)
433 			return -EINVAL;
434 		if (!(req->flags & REQ_F_BUFFER_SELECT))
435 			return -EINVAL;
436 		sr->msg_flags |= MSG_WAITALL;
437 		sr->buf_group = req->buf_index;
438 		req->buf_list = NULL;
439 	}
440 
441 #ifdef CONFIG_COMPAT
442 	if (req->ctx->compat)
443 		sr->msg_flags |= MSG_CMSG_COMPAT;
444 #endif
445 	if (unlikely(!io_msg_alloc_async(req)))
446 		return -ENOMEM;
447 	if (req->opcode != IORING_OP_SENDMSG)
448 		return io_send_setup(req, sqe);
449 	return io_sendmsg_setup(req, sqe);
450 }
451 
452 static void io_req_msg_cleanup(struct io_kiocb *req,
453 			       unsigned int issue_flags)
454 {
455 	req->flags &= ~REQ_F_NEED_CLEANUP;
456 	io_netmsg_recycle(req, issue_flags);
457 }
458 
459 /*
460  * For bundle completions, we need to figure out how many segments we consumed.
461  * A bundle could be using a single ITER_UBUF if that's all we mapped, or it
462  * could be using an ITER_IOVEC. If the latter, then if we consumed all of
463  * the segments, then it's a trivial questiont o answer. If we have residual
464  * data in the iter, then loop the segments to figure out how much we
465  * transferred.
466  */
467 static int io_bundle_nbufs(struct io_async_msghdr *kmsg, int ret)
468 {
469 	struct iovec *iov;
470 	int nbufs;
471 
472 	/* no data is always zero segments, and a ubuf is always 1 segment */
473 	if (ret <= 0)
474 		return 0;
475 	if (iter_is_ubuf(&kmsg->msg.msg_iter))
476 		return 1;
477 
478 	iov = kmsg->free_iov;
479 	if (!iov)
480 		iov = &kmsg->fast_iov;
481 
482 	/* if all data was transferred, it's basic pointer math */
483 	if (!iov_iter_count(&kmsg->msg.msg_iter))
484 		return iter_iov(&kmsg->msg.msg_iter) - iov;
485 
486 	/* short transfer, count segments */
487 	nbufs = 0;
488 	do {
489 		int this_len = min_t(int, iov[nbufs].iov_len, ret);
490 
491 		nbufs++;
492 		ret -= this_len;
493 	} while (ret);
494 
495 	return nbufs;
496 }
497 
498 static inline bool io_send_finish(struct io_kiocb *req, int *ret,
499 				  struct io_async_msghdr *kmsg,
500 				  unsigned issue_flags)
501 {
502 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
503 	bool bundle_finished = *ret <= 0;
504 	unsigned int cflags;
505 
506 	if (!(sr->flags & IORING_RECVSEND_BUNDLE)) {
507 		cflags = io_put_kbuf(req, *ret, issue_flags);
508 		goto finish;
509 	}
510 
511 	cflags = io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret), issue_flags);
512 
513 	if (bundle_finished || req->flags & REQ_F_BL_EMPTY)
514 		goto finish;
515 
516 	/*
517 	 * Fill CQE for this receive and see if we should keep trying to
518 	 * receive from this socket.
519 	 */
520 	if (io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) {
521 		io_mshot_prep_retry(req, kmsg);
522 		return false;
523 	}
524 
525 	/* Otherwise stop bundle and use the current result. */
526 finish:
527 	io_req_set_res(req, *ret, cflags);
528 	*ret = IOU_OK;
529 	return true;
530 }
531 
532 int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
533 {
534 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
535 	struct io_async_msghdr *kmsg = req->async_data;
536 	struct socket *sock;
537 	unsigned flags;
538 	int min_ret = 0;
539 	int ret;
540 
541 	sock = sock_from_file(req->file);
542 	if (unlikely(!sock))
543 		return -ENOTSOCK;
544 
545 	if (!(req->flags & REQ_F_POLLED) &&
546 	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
547 		return -EAGAIN;
548 
549 	flags = sr->msg_flags;
550 	if (issue_flags & IO_URING_F_NONBLOCK)
551 		flags |= MSG_DONTWAIT;
552 	if (flags & MSG_WAITALL)
553 		min_ret = iov_iter_count(&kmsg->msg.msg_iter);
554 
555 	kmsg->msg.msg_control_user = sr->msg_control;
556 
557 	ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
558 
559 	if (ret < min_ret) {
560 		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
561 			return -EAGAIN;
562 		if (ret > 0 && io_net_retry(sock, flags)) {
563 			kmsg->msg.msg_controllen = 0;
564 			kmsg->msg.msg_control = NULL;
565 			sr->done_io += ret;
566 			req->flags |= REQ_F_BL_NO_RECYCLE;
567 			return -EAGAIN;
568 		}
569 		if (ret == -ERESTARTSYS)
570 			ret = -EINTR;
571 		req_set_fail(req);
572 	}
573 	io_req_msg_cleanup(req, issue_flags);
574 	if (ret >= 0)
575 		ret += sr->done_io;
576 	else if (sr->done_io)
577 		ret = sr->done_io;
578 	io_req_set_res(req, ret, 0);
579 	return IOU_OK;
580 }
581 
582 int io_send(struct io_kiocb *req, unsigned int issue_flags)
583 {
584 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
585 	struct io_async_msghdr *kmsg = req->async_data;
586 	struct socket *sock;
587 	unsigned flags;
588 	int min_ret = 0;
589 	int ret;
590 
591 	sock = sock_from_file(req->file);
592 	if (unlikely(!sock))
593 		return -ENOTSOCK;
594 
595 	if (!(req->flags & REQ_F_POLLED) &&
596 	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
597 		return -EAGAIN;
598 
599 	flags = sr->msg_flags;
600 	if (issue_flags & IO_URING_F_NONBLOCK)
601 		flags |= MSG_DONTWAIT;
602 
603 retry_bundle:
604 	if (io_do_buffer_select(req)) {
605 		struct buf_sel_arg arg = {
606 			.iovs = &kmsg->fast_iov,
607 			.max_len = min_not_zero(sr->len, INT_MAX),
608 			.nr_iovs = 1,
609 		};
610 
611 		if (kmsg->free_iov) {
612 			arg.nr_iovs = kmsg->free_iov_nr;
613 			arg.iovs = kmsg->free_iov;
614 			arg.mode = KBUF_MODE_FREE;
615 		}
616 
617 		if (!(sr->flags & IORING_RECVSEND_BUNDLE))
618 			arg.nr_iovs = 1;
619 		else
620 			arg.mode |= KBUF_MODE_EXPAND;
621 
622 		ret = io_buffers_select(req, &arg, issue_flags);
623 		if (unlikely(ret < 0))
624 			return ret;
625 
626 		if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) {
627 			kmsg->free_iov_nr = ret;
628 			kmsg->free_iov = arg.iovs;
629 			req->flags |= REQ_F_NEED_CLEANUP;
630 		}
631 		sr->len = arg.out_len;
632 
633 		if (ret == 1) {
634 			sr->buf = arg.iovs[0].iov_base;
635 			ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len,
636 						&kmsg->msg.msg_iter);
637 			if (unlikely(ret))
638 				return ret;
639 		} else {
640 			iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE,
641 					arg.iovs, ret, arg.out_len);
642 		}
643 	}
644 
645 	/*
646 	 * If MSG_WAITALL is set, or this is a bundle send, then we need
647 	 * the full amount. If just bundle is set, if we do a short send
648 	 * then we complete the bundle sequence rather than continue on.
649 	 */
650 	if (flags & MSG_WAITALL || sr->flags & IORING_RECVSEND_BUNDLE)
651 		min_ret = iov_iter_count(&kmsg->msg.msg_iter);
652 
653 	flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
654 	kmsg->msg.msg_flags = flags;
655 	ret = sock_sendmsg(sock, &kmsg->msg);
656 	if (ret < min_ret) {
657 		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
658 			return -EAGAIN;
659 
660 		if (ret > 0 && io_net_retry(sock, flags)) {
661 			sr->len -= ret;
662 			sr->buf += ret;
663 			sr->done_io += ret;
664 			req->flags |= REQ_F_BL_NO_RECYCLE;
665 			return -EAGAIN;
666 		}
667 		if (ret == -ERESTARTSYS)
668 			ret = -EINTR;
669 		req_set_fail(req);
670 	}
671 	if (ret >= 0)
672 		ret += sr->done_io;
673 	else if (sr->done_io)
674 		ret = sr->done_io;
675 
676 	if (!io_send_finish(req, &ret, kmsg, issue_flags))
677 		goto retry_bundle;
678 
679 	io_req_msg_cleanup(req, issue_flags);
680 	return ret;
681 }
682 
683 static int io_recvmsg_mshot_prep(struct io_kiocb *req,
684 				 struct io_async_msghdr *iomsg,
685 				 int namelen, size_t controllen)
686 {
687 	if ((req->flags & (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) ==
688 			  (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) {
689 		int hdr;
690 
691 		if (unlikely(namelen < 0))
692 			return -EOVERFLOW;
693 		if (check_add_overflow(sizeof(struct io_uring_recvmsg_out),
694 					namelen, &hdr))
695 			return -EOVERFLOW;
696 		if (check_add_overflow(hdr, controllen, &hdr))
697 			return -EOVERFLOW;
698 
699 		iomsg->namelen = namelen;
700 		iomsg->controllen = controllen;
701 		return 0;
702 	}
703 
704 	return 0;
705 }
706 
707 static int io_recvmsg_copy_hdr(struct io_kiocb *req,
708 			       struct io_async_msghdr *iomsg)
709 {
710 	struct user_msghdr msg;
711 	int ret;
712 
713 	iomsg->msg.msg_name = &iomsg->addr;
714 	iomsg->msg.msg_iter.nr_segs = 0;
715 
716 #ifdef CONFIG_COMPAT
717 	if (unlikely(req->ctx->compat)) {
718 		struct compat_msghdr cmsg;
719 
720 		ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_DEST);
721 		if (unlikely(ret))
722 			return ret;
723 
724 		ret = __get_compat_msghdr(&iomsg->msg, &cmsg, &iomsg->uaddr);
725 		if (unlikely(ret))
726 			return ret;
727 
728 		return io_recvmsg_mshot_prep(req, iomsg, cmsg.msg_namelen,
729 						cmsg.msg_controllen);
730 	}
731 #endif
732 
733 	ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST);
734 	if (unlikely(ret))
735 		return ret;
736 
737 	ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr);
738 	if (unlikely(ret))
739 		return ret;
740 
741 	return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen,
742 					msg.msg_controllen);
743 }
744 
745 static int io_recvmsg_prep_setup(struct io_kiocb *req)
746 {
747 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
748 	struct io_async_msghdr *kmsg;
749 	int ret;
750 
751 	kmsg = io_msg_alloc_async(req);
752 	if (unlikely(!kmsg))
753 		return -ENOMEM;
754 
755 	if (req->opcode == IORING_OP_RECV) {
756 		kmsg->msg.msg_name = NULL;
757 		kmsg->msg.msg_namelen = 0;
758 		kmsg->msg.msg_inq = 0;
759 		kmsg->msg.msg_control = NULL;
760 		kmsg->msg.msg_get_inq = 1;
761 		kmsg->msg.msg_controllen = 0;
762 		kmsg->msg.msg_iocb = NULL;
763 		kmsg->msg.msg_ubuf = NULL;
764 
765 		if (!io_do_buffer_select(req)) {
766 			ret = import_ubuf(ITER_DEST, sr->buf, sr->len,
767 					  &kmsg->msg.msg_iter);
768 			if (unlikely(ret))
769 				return ret;
770 		}
771 		return 0;
772 	}
773 
774 	ret = io_recvmsg_copy_hdr(req, kmsg);
775 	if (!ret)
776 		req->flags |= REQ_F_NEED_CLEANUP;
777 	return ret;
778 }
779 
780 #define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT | \
781 			IORING_RECVSEND_BUNDLE)
782 
783 int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
784 {
785 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
786 
787 	sr->done_io = 0;
788 
789 	if (unlikely(sqe->file_index || sqe->addr2))
790 		return -EINVAL;
791 
792 	sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
793 	sr->len = READ_ONCE(sqe->len);
794 	sr->flags = READ_ONCE(sqe->ioprio);
795 	if (sr->flags & ~RECVMSG_FLAGS)
796 		return -EINVAL;
797 	sr->msg_flags = READ_ONCE(sqe->msg_flags);
798 	if (sr->msg_flags & MSG_DONTWAIT)
799 		req->flags |= REQ_F_NOWAIT;
800 	if (sr->msg_flags & MSG_ERRQUEUE)
801 		req->flags |= REQ_F_CLEAR_POLLIN;
802 	if (req->flags & REQ_F_BUFFER_SELECT) {
803 		/*
804 		 * Store the buffer group for this multishot receive separately,
805 		 * as if we end up doing an io-wq based issue that selects a
806 		 * buffer, it has to be committed immediately and that will
807 		 * clear ->buf_list. This means we lose the link to the buffer
808 		 * list, and the eventual buffer put on completion then cannot
809 		 * restore it.
810 		 */
811 		sr->buf_group = req->buf_index;
812 		req->buf_list = NULL;
813 	}
814 	if (sr->flags & IORING_RECV_MULTISHOT) {
815 		if (!(req->flags & REQ_F_BUFFER_SELECT))
816 			return -EINVAL;
817 		if (sr->msg_flags & MSG_WAITALL)
818 			return -EINVAL;
819 		if (req->opcode == IORING_OP_RECV && sr->len)
820 			return -EINVAL;
821 		req->flags |= REQ_F_APOLL_MULTISHOT;
822 	}
823 	if (sr->flags & IORING_RECVSEND_BUNDLE) {
824 		if (req->opcode == IORING_OP_RECVMSG)
825 			return -EINVAL;
826 	}
827 
828 #ifdef CONFIG_COMPAT
829 	if (req->ctx->compat)
830 		sr->msg_flags |= MSG_CMSG_COMPAT;
831 #endif
832 	sr->nr_multishot_loops = 0;
833 	return io_recvmsg_prep_setup(req);
834 }
835 
836 /*
837  * Finishes io_recv and io_recvmsg.
838  *
839  * Returns true if it is actually finished, or false if it should run
840  * again (for multishot).
841  */
842 static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
843 				  struct io_async_msghdr *kmsg,
844 				  bool mshot_finished, unsigned issue_flags)
845 {
846 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
847 	unsigned int cflags = 0;
848 
849 	if (kmsg->msg.msg_inq > 0)
850 		cflags |= IORING_CQE_F_SOCK_NONEMPTY;
851 
852 	if (sr->flags & IORING_RECVSEND_BUNDLE) {
853 		cflags |= io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret),
854 				      issue_flags);
855 		/* bundle with no more immediate buffers, we're done */
856 		if (req->flags & REQ_F_BL_EMPTY)
857 			goto finish;
858 	} else {
859 		cflags |= io_put_kbuf(req, *ret, issue_flags);
860 	}
861 
862 	/*
863 	 * Fill CQE for this receive and see if we should keep trying to
864 	 * receive from this socket.
865 	 */
866 	if ((req->flags & REQ_F_APOLL_MULTISHOT) && !mshot_finished &&
867 	    io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) {
868 		int mshot_retry_ret = IOU_ISSUE_SKIP_COMPLETE;
869 
870 		io_mshot_prep_retry(req, kmsg);
871 		/* Known not-empty or unknown state, retry */
872 		if (cflags & IORING_CQE_F_SOCK_NONEMPTY || kmsg->msg.msg_inq < 0) {
873 			if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY)
874 				return false;
875 			/* mshot retries exceeded, force a requeue */
876 			sr->nr_multishot_loops = 0;
877 			mshot_retry_ret = IOU_REQUEUE;
878 		}
879 		if (issue_flags & IO_URING_F_MULTISHOT)
880 			*ret = mshot_retry_ret;
881 		else
882 			*ret = -EAGAIN;
883 		return true;
884 	}
885 
886 	/* Finish the request / stop multishot. */
887 finish:
888 	io_req_set_res(req, *ret, cflags);
889 
890 	if (issue_flags & IO_URING_F_MULTISHOT)
891 		*ret = IOU_STOP_MULTISHOT;
892 	else
893 		*ret = IOU_OK;
894 	io_req_msg_cleanup(req, issue_flags);
895 	return true;
896 }
897 
898 static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg,
899 				     struct io_sr_msg *sr, void __user **buf,
900 				     size_t *len)
901 {
902 	unsigned long ubuf = (unsigned long) *buf;
903 	unsigned long hdr;
904 
905 	hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen +
906 		kmsg->controllen;
907 	if (*len < hdr)
908 		return -EFAULT;
909 
910 	if (kmsg->controllen) {
911 		unsigned long control = ubuf + hdr - kmsg->controllen;
912 
913 		kmsg->msg.msg_control_user = (void __user *) control;
914 		kmsg->msg.msg_controllen = kmsg->controllen;
915 	}
916 
917 	sr->buf = *buf; /* stash for later copy */
918 	*buf = (void __user *) (ubuf + hdr);
919 	kmsg->payloadlen = *len = *len - hdr;
920 	return 0;
921 }
922 
923 struct io_recvmsg_multishot_hdr {
924 	struct io_uring_recvmsg_out msg;
925 	struct sockaddr_storage addr;
926 };
927 
928 static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io,
929 				struct io_async_msghdr *kmsg,
930 				unsigned int flags, bool *finished)
931 {
932 	int err;
933 	int copy_len;
934 	struct io_recvmsg_multishot_hdr hdr;
935 
936 	if (kmsg->namelen)
937 		kmsg->msg.msg_name = &hdr.addr;
938 	kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
939 	kmsg->msg.msg_namelen = 0;
940 
941 	if (sock->file->f_flags & O_NONBLOCK)
942 		flags |= MSG_DONTWAIT;
943 
944 	err = sock_recvmsg(sock, &kmsg->msg, flags);
945 	*finished = err <= 0;
946 	if (err < 0)
947 		return err;
948 
949 	hdr.msg = (struct io_uring_recvmsg_out) {
950 		.controllen = kmsg->controllen - kmsg->msg.msg_controllen,
951 		.flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT
952 	};
953 
954 	hdr.msg.payloadlen = err;
955 	if (err > kmsg->payloadlen)
956 		err = kmsg->payloadlen;
957 
958 	copy_len = sizeof(struct io_uring_recvmsg_out);
959 	if (kmsg->msg.msg_namelen > kmsg->namelen)
960 		copy_len += kmsg->namelen;
961 	else
962 		copy_len += kmsg->msg.msg_namelen;
963 
964 	/*
965 	 *      "fromlen shall refer to the value before truncation.."
966 	 *                      1003.1g
967 	 */
968 	hdr.msg.namelen = kmsg->msg.msg_namelen;
969 
970 	/* ensure that there is no gap between hdr and sockaddr_storage */
971 	BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) !=
972 		     sizeof(struct io_uring_recvmsg_out));
973 	if (copy_to_user(io->buf, &hdr, copy_len)) {
974 		*finished = true;
975 		return -EFAULT;
976 	}
977 
978 	return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen +
979 			kmsg->controllen + err;
980 }
981 
982 int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
983 {
984 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
985 	struct io_async_msghdr *kmsg = req->async_data;
986 	struct socket *sock;
987 	unsigned flags;
988 	int ret, min_ret = 0;
989 	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
990 	bool mshot_finished = true;
991 
992 	sock = sock_from_file(req->file);
993 	if (unlikely(!sock))
994 		return -ENOTSOCK;
995 
996 	if (!(req->flags & REQ_F_POLLED) &&
997 	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
998 		return -EAGAIN;
999 
1000 	flags = sr->msg_flags;
1001 	if (force_nonblock)
1002 		flags |= MSG_DONTWAIT;
1003 
1004 retry_multishot:
1005 	if (io_do_buffer_select(req)) {
1006 		void __user *buf;
1007 		size_t len = sr->len;
1008 
1009 		buf = io_buffer_select(req, &len, issue_flags);
1010 		if (!buf)
1011 			return -ENOBUFS;
1012 
1013 		if (req->flags & REQ_F_APOLL_MULTISHOT) {
1014 			ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len);
1015 			if (ret) {
1016 				io_kbuf_recycle(req, issue_flags);
1017 				return ret;
1018 			}
1019 		}
1020 
1021 		iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, buf, len);
1022 	}
1023 
1024 	kmsg->msg.msg_get_inq = 1;
1025 	kmsg->msg.msg_inq = -1;
1026 	if (req->flags & REQ_F_APOLL_MULTISHOT) {
1027 		ret = io_recvmsg_multishot(sock, sr, kmsg, flags,
1028 					   &mshot_finished);
1029 	} else {
1030 		/* disable partial retry for recvmsg with cmsg attached */
1031 		if (flags & MSG_WAITALL && !kmsg->msg.msg_controllen)
1032 			min_ret = iov_iter_count(&kmsg->msg.msg_iter);
1033 
1034 		ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg,
1035 					 kmsg->uaddr, flags);
1036 	}
1037 
1038 	if (ret < min_ret) {
1039 		if (ret == -EAGAIN && force_nonblock) {
1040 			if (issue_flags & IO_URING_F_MULTISHOT) {
1041 				io_kbuf_recycle(req, issue_flags);
1042 				return IOU_ISSUE_SKIP_COMPLETE;
1043 			}
1044 			return -EAGAIN;
1045 		}
1046 		if (ret > 0 && io_net_retry(sock, flags)) {
1047 			sr->done_io += ret;
1048 			req->flags |= REQ_F_BL_NO_RECYCLE;
1049 			return -EAGAIN;
1050 		}
1051 		if (ret == -ERESTARTSYS)
1052 			ret = -EINTR;
1053 		req_set_fail(req);
1054 	} else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
1055 		req_set_fail(req);
1056 	}
1057 
1058 	if (ret > 0)
1059 		ret += sr->done_io;
1060 	else if (sr->done_io)
1061 		ret = sr->done_io;
1062 	else
1063 		io_kbuf_recycle(req, issue_flags);
1064 
1065 	if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags))
1066 		goto retry_multishot;
1067 
1068 	return ret;
1069 }
1070 
1071 static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg,
1072 			      size_t *len, unsigned int issue_flags)
1073 {
1074 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1075 	int ret;
1076 
1077 	/*
1078 	 * If the ring isn't locked, then don't use the peek interface
1079 	 * to grab multiple buffers as we will lock/unlock between
1080 	 * this selection and posting the buffers.
1081 	 */
1082 	if (!(issue_flags & IO_URING_F_UNLOCKED) &&
1083 	    sr->flags & IORING_RECVSEND_BUNDLE) {
1084 		struct buf_sel_arg arg = {
1085 			.iovs = &kmsg->fast_iov,
1086 			.nr_iovs = 1,
1087 			.mode = KBUF_MODE_EXPAND,
1088 		};
1089 
1090 		if (kmsg->free_iov) {
1091 			arg.nr_iovs = kmsg->free_iov_nr;
1092 			arg.iovs = kmsg->free_iov;
1093 			arg.mode |= KBUF_MODE_FREE;
1094 		}
1095 
1096 		if (kmsg->msg.msg_inq > 0)
1097 			arg.max_len = min_not_zero(sr->len, kmsg->msg.msg_inq);
1098 
1099 		ret = io_buffers_peek(req, &arg);
1100 		if (unlikely(ret < 0))
1101 			return ret;
1102 
1103 		/* special case 1 vec, can be a fast path */
1104 		if (ret == 1) {
1105 			sr->buf = arg.iovs[0].iov_base;
1106 			sr->len = arg.iovs[0].iov_len;
1107 			goto map_ubuf;
1108 		}
1109 		iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret,
1110 				arg.out_len);
1111 		if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) {
1112 			kmsg->free_iov_nr = ret;
1113 			kmsg->free_iov = arg.iovs;
1114 			req->flags |= REQ_F_NEED_CLEANUP;
1115 		}
1116 	} else {
1117 		void __user *buf;
1118 
1119 		*len = sr->len;
1120 		buf = io_buffer_select(req, len, issue_flags);
1121 		if (!buf)
1122 			return -ENOBUFS;
1123 		sr->buf = buf;
1124 		sr->len = *len;
1125 map_ubuf:
1126 		ret = import_ubuf(ITER_DEST, sr->buf, sr->len,
1127 				  &kmsg->msg.msg_iter);
1128 		if (unlikely(ret))
1129 			return ret;
1130 	}
1131 
1132 	return 0;
1133 }
1134 
1135 int io_recv(struct io_kiocb *req, unsigned int issue_flags)
1136 {
1137 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1138 	struct io_async_msghdr *kmsg = req->async_data;
1139 	struct socket *sock;
1140 	unsigned flags;
1141 	int ret, min_ret = 0;
1142 	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1143 	size_t len = sr->len;
1144 	bool mshot_finished;
1145 
1146 	if (!(req->flags & REQ_F_POLLED) &&
1147 	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
1148 		return -EAGAIN;
1149 
1150 	sock = sock_from_file(req->file);
1151 	if (unlikely(!sock))
1152 		return -ENOTSOCK;
1153 
1154 	flags = sr->msg_flags;
1155 	if (force_nonblock)
1156 		flags |= MSG_DONTWAIT;
1157 
1158 retry_multishot:
1159 	if (io_do_buffer_select(req)) {
1160 		ret = io_recv_buf_select(req, kmsg, &len, issue_flags);
1161 		if (unlikely(ret)) {
1162 			kmsg->msg.msg_inq = -1;
1163 			goto out_free;
1164 		}
1165 		sr->buf = NULL;
1166 	}
1167 
1168 	kmsg->msg.msg_flags = 0;
1169 	kmsg->msg.msg_inq = -1;
1170 
1171 	if (flags & MSG_WAITALL)
1172 		min_ret = iov_iter_count(&kmsg->msg.msg_iter);
1173 
1174 	ret = sock_recvmsg(sock, &kmsg->msg, flags);
1175 	if (ret < min_ret) {
1176 		if (ret == -EAGAIN && force_nonblock) {
1177 			if (issue_flags & IO_URING_F_MULTISHOT) {
1178 				io_kbuf_recycle(req, issue_flags);
1179 				return IOU_ISSUE_SKIP_COMPLETE;
1180 			}
1181 
1182 			return -EAGAIN;
1183 		}
1184 		if (ret > 0 && io_net_retry(sock, flags)) {
1185 			sr->len -= ret;
1186 			sr->buf += ret;
1187 			sr->done_io += ret;
1188 			req->flags |= REQ_F_BL_NO_RECYCLE;
1189 			return -EAGAIN;
1190 		}
1191 		if (ret == -ERESTARTSYS)
1192 			ret = -EINTR;
1193 		req_set_fail(req);
1194 	} else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
1195 out_free:
1196 		req_set_fail(req);
1197 	}
1198 
1199 	mshot_finished = ret <= 0;
1200 	if (ret > 0)
1201 		ret += sr->done_io;
1202 	else if (sr->done_io)
1203 		ret = sr->done_io;
1204 	else
1205 		io_kbuf_recycle(req, issue_flags);
1206 
1207 	if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags))
1208 		goto retry_multishot;
1209 
1210 	return ret;
1211 }
1212 
1213 void io_send_zc_cleanup(struct io_kiocb *req)
1214 {
1215 	struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
1216 	struct io_async_msghdr *io = req->async_data;
1217 
1218 	if (req_has_async_data(req))
1219 		io_netmsg_iovec_free(io);
1220 	if (zc->notif) {
1221 		io_notif_flush(zc->notif);
1222 		zc->notif = NULL;
1223 	}
1224 }
1225 
1226 #define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF)
1227 #define IO_ZC_FLAGS_VALID  (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE)
1228 
1229 int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1230 {
1231 	struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
1232 	struct io_ring_ctx *ctx = req->ctx;
1233 	struct io_kiocb *notif;
1234 
1235 	zc->done_io = 0;
1236 	req->flags |= REQ_F_POLL_NO_LAZY;
1237 
1238 	if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))
1239 		return -EINVAL;
1240 	/* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */
1241 	if (req->flags & REQ_F_CQE_SKIP)
1242 		return -EINVAL;
1243 
1244 	notif = zc->notif = io_alloc_notif(ctx);
1245 	if (!notif)
1246 		return -ENOMEM;
1247 	notif->cqe.user_data = req->cqe.user_data;
1248 	notif->cqe.res = 0;
1249 	notif->cqe.flags = IORING_CQE_F_NOTIF;
1250 	req->flags |= REQ_F_NEED_CLEANUP;
1251 
1252 	zc->flags = READ_ONCE(sqe->ioprio);
1253 	if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) {
1254 		if (zc->flags & ~IO_ZC_FLAGS_VALID)
1255 			return -EINVAL;
1256 		if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) {
1257 			struct io_notif_data *nd = io_notif_to_data(notif);
1258 
1259 			nd->zc_report = true;
1260 			nd->zc_used = false;
1261 			nd->zc_copied = false;
1262 		}
1263 	}
1264 
1265 	if (req->opcode != IORING_OP_SEND_ZC) {
1266 		if (unlikely(sqe->addr2 || sqe->file_index))
1267 			return -EINVAL;
1268 		if (unlikely(zc->flags & IORING_RECVSEND_FIXED_BUF))
1269 			return -EINVAL;
1270 	}
1271 
1272 	zc->len = READ_ONCE(sqe->len);
1273 	zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL | MSG_ZEROCOPY;
1274 	zc->buf_index = READ_ONCE(sqe->buf_index);
1275 	if (zc->msg_flags & MSG_DONTWAIT)
1276 		req->flags |= REQ_F_NOWAIT;
1277 
1278 #ifdef CONFIG_COMPAT
1279 	if (req->ctx->compat)
1280 		zc->msg_flags |= MSG_CMSG_COMPAT;
1281 #endif
1282 	if (unlikely(!io_msg_alloc_async(req)))
1283 		return -ENOMEM;
1284 	if (req->opcode != IORING_OP_SENDMSG_ZC)
1285 		return io_send_setup(req, sqe);
1286 	return io_sendmsg_setup(req, sqe);
1287 }
1288 
1289 static int io_sg_from_iter_iovec(struct sk_buff *skb,
1290 				 struct iov_iter *from, size_t length)
1291 {
1292 	skb_zcopy_downgrade_managed(skb);
1293 	return zerocopy_fill_skb_from_iter(skb, from, length);
1294 }
1295 
1296 static int io_sg_from_iter(struct sk_buff *skb,
1297 			   struct iov_iter *from, size_t length)
1298 {
1299 	struct skb_shared_info *shinfo = skb_shinfo(skb);
1300 	int frag = shinfo->nr_frags;
1301 	int ret = 0;
1302 	struct bvec_iter bi;
1303 	ssize_t copied = 0;
1304 	unsigned long truesize = 0;
1305 
1306 	if (!frag)
1307 		shinfo->flags |= SKBFL_MANAGED_FRAG_REFS;
1308 	else if (unlikely(!skb_zcopy_managed(skb)))
1309 		return zerocopy_fill_skb_from_iter(skb, from, length);
1310 
1311 	bi.bi_size = min(from->count, length);
1312 	bi.bi_bvec_done = from->iov_offset;
1313 	bi.bi_idx = 0;
1314 
1315 	while (bi.bi_size && frag < MAX_SKB_FRAGS) {
1316 		struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi);
1317 
1318 		copied += v.bv_len;
1319 		truesize += PAGE_ALIGN(v.bv_len + v.bv_offset);
1320 		__skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page,
1321 					   v.bv_offset, v.bv_len);
1322 		bvec_iter_advance_single(from->bvec, &bi, v.bv_len);
1323 	}
1324 	if (bi.bi_size)
1325 		ret = -EMSGSIZE;
1326 
1327 	shinfo->nr_frags = frag;
1328 	from->bvec += bi.bi_idx;
1329 	from->nr_segs -= bi.bi_idx;
1330 	from->count -= copied;
1331 	from->iov_offset = bi.bi_bvec_done;
1332 
1333 	skb->data_len += copied;
1334 	skb->len += copied;
1335 	skb->truesize += truesize;
1336 	return ret;
1337 }
1338 
1339 static int io_send_zc_import(struct io_kiocb *req, unsigned int issue_flags)
1340 {
1341 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1342 	struct io_async_msghdr *kmsg = req->async_data;
1343 	int ret;
1344 
1345 	if (sr->flags & IORING_RECVSEND_FIXED_BUF) {
1346 		struct io_ring_ctx *ctx = req->ctx;
1347 		struct io_rsrc_node *node;
1348 
1349 		ret = -EFAULT;
1350 		io_ring_submit_lock(ctx, issue_flags);
1351 		node = io_rsrc_node_lookup(&ctx->buf_table, sr->buf_index);
1352 		if (node) {
1353 			io_req_assign_buf_node(sr->notif, node);
1354 			ret = 0;
1355 		}
1356 		io_ring_submit_unlock(ctx, issue_flags);
1357 
1358 		if (unlikely(ret))
1359 			return ret;
1360 
1361 		ret = io_import_fixed(ITER_SOURCE, &kmsg->msg.msg_iter,
1362 					node->buf, (u64)(uintptr_t)sr->buf,
1363 					sr->len);
1364 		if (unlikely(ret))
1365 			return ret;
1366 		kmsg->msg.sg_from_iter = io_sg_from_iter;
1367 	} else {
1368 		ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter);
1369 		if (unlikely(ret))
1370 			return ret;
1371 		ret = io_notif_account_mem(sr->notif, sr->len);
1372 		if (unlikely(ret))
1373 			return ret;
1374 		kmsg->msg.sg_from_iter = io_sg_from_iter_iovec;
1375 	}
1376 
1377 	return ret;
1378 }
1379 
1380 int io_send_zc(struct io_kiocb *req, unsigned int issue_flags)
1381 {
1382 	struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
1383 	struct io_async_msghdr *kmsg = req->async_data;
1384 	struct socket *sock;
1385 	unsigned msg_flags;
1386 	int ret, min_ret = 0;
1387 
1388 	sock = sock_from_file(req->file);
1389 	if (unlikely(!sock))
1390 		return -ENOTSOCK;
1391 	if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags))
1392 		return -EOPNOTSUPP;
1393 
1394 	if (!(req->flags & REQ_F_POLLED) &&
1395 	    (zc->flags & IORING_RECVSEND_POLL_FIRST))
1396 		return -EAGAIN;
1397 
1398 	if (!zc->done_io) {
1399 		ret = io_send_zc_import(req, issue_flags);
1400 		if (unlikely(ret))
1401 			return ret;
1402 	}
1403 
1404 	msg_flags = zc->msg_flags;
1405 	if (issue_flags & IO_URING_F_NONBLOCK)
1406 		msg_flags |= MSG_DONTWAIT;
1407 	if (msg_flags & MSG_WAITALL)
1408 		min_ret = iov_iter_count(&kmsg->msg.msg_iter);
1409 	msg_flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
1410 
1411 	kmsg->msg.msg_flags = msg_flags;
1412 	kmsg->msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg;
1413 	ret = sock_sendmsg(sock, &kmsg->msg);
1414 
1415 	if (unlikely(ret < min_ret)) {
1416 		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1417 			return -EAGAIN;
1418 
1419 		if (ret > 0 && io_net_retry(sock, kmsg->msg.msg_flags)) {
1420 			zc->len -= ret;
1421 			zc->buf += ret;
1422 			zc->done_io += ret;
1423 			req->flags |= REQ_F_BL_NO_RECYCLE;
1424 			return -EAGAIN;
1425 		}
1426 		if (ret == -ERESTARTSYS)
1427 			ret = -EINTR;
1428 		req_set_fail(req);
1429 	}
1430 
1431 	if (ret >= 0)
1432 		ret += zc->done_io;
1433 	else if (zc->done_io)
1434 		ret = zc->done_io;
1435 
1436 	/*
1437 	 * If we're in io-wq we can't rely on tw ordering guarantees, defer
1438 	 * flushing notif to io_send_zc_cleanup()
1439 	 */
1440 	if (!(issue_flags & IO_URING_F_UNLOCKED)) {
1441 		io_notif_flush(zc->notif);
1442 		io_req_msg_cleanup(req, 0);
1443 	}
1444 	io_req_set_res(req, ret, IORING_CQE_F_MORE);
1445 	return IOU_OK;
1446 }
1447 
1448 int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
1449 {
1450 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1451 	struct io_async_msghdr *kmsg = req->async_data;
1452 	struct socket *sock;
1453 	unsigned flags;
1454 	int ret, min_ret = 0;
1455 
1456 	sock = sock_from_file(req->file);
1457 	if (unlikely(!sock))
1458 		return -ENOTSOCK;
1459 	if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags))
1460 		return -EOPNOTSUPP;
1461 
1462 	if (!(req->flags & REQ_F_POLLED) &&
1463 	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
1464 		return -EAGAIN;
1465 
1466 	flags = sr->msg_flags;
1467 	if (issue_flags & IO_URING_F_NONBLOCK)
1468 		flags |= MSG_DONTWAIT;
1469 	if (flags & MSG_WAITALL)
1470 		min_ret = iov_iter_count(&kmsg->msg.msg_iter);
1471 
1472 	kmsg->msg.msg_control_user = sr->msg_control;
1473 	kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg;
1474 	kmsg->msg.sg_from_iter = io_sg_from_iter_iovec;
1475 	ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
1476 
1477 	if (unlikely(ret < min_ret)) {
1478 		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1479 			return -EAGAIN;
1480 
1481 		if (ret > 0 && io_net_retry(sock, flags)) {
1482 			sr->done_io += ret;
1483 			req->flags |= REQ_F_BL_NO_RECYCLE;
1484 			return -EAGAIN;
1485 		}
1486 		if (ret == -ERESTARTSYS)
1487 			ret = -EINTR;
1488 		req_set_fail(req);
1489 	}
1490 
1491 	if (ret >= 0)
1492 		ret += sr->done_io;
1493 	else if (sr->done_io)
1494 		ret = sr->done_io;
1495 
1496 	/*
1497 	 * If we're in io-wq we can't rely on tw ordering guarantees, defer
1498 	 * flushing notif to io_send_zc_cleanup()
1499 	 */
1500 	if (!(issue_flags & IO_URING_F_UNLOCKED)) {
1501 		io_notif_flush(sr->notif);
1502 		io_req_msg_cleanup(req, 0);
1503 	}
1504 	io_req_set_res(req, ret, IORING_CQE_F_MORE);
1505 	return IOU_OK;
1506 }
1507 
1508 void io_sendrecv_fail(struct io_kiocb *req)
1509 {
1510 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1511 
1512 	if (sr->done_io)
1513 		req->cqe.res = sr->done_io;
1514 
1515 	if ((req->flags & REQ_F_NEED_CLEANUP) &&
1516 	    (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC))
1517 		req->cqe.flags |= IORING_CQE_F_MORE;
1518 }
1519 
1520 #define ACCEPT_FLAGS	(IORING_ACCEPT_MULTISHOT | IORING_ACCEPT_DONTWAIT | \
1521 			 IORING_ACCEPT_POLL_FIRST)
1522 
1523 int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1524 {
1525 	struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
1526 
1527 	if (sqe->len || sqe->buf_index)
1528 		return -EINVAL;
1529 
1530 	accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1531 	accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
1532 	accept->flags = READ_ONCE(sqe->accept_flags);
1533 	accept->nofile = rlimit(RLIMIT_NOFILE);
1534 	accept->iou_flags = READ_ONCE(sqe->ioprio);
1535 	if (accept->iou_flags & ~ACCEPT_FLAGS)
1536 		return -EINVAL;
1537 
1538 	accept->file_slot = READ_ONCE(sqe->file_index);
1539 	if (accept->file_slot) {
1540 		if (accept->flags & SOCK_CLOEXEC)
1541 			return -EINVAL;
1542 		if (accept->iou_flags & IORING_ACCEPT_MULTISHOT &&
1543 		    accept->file_slot != IORING_FILE_INDEX_ALLOC)
1544 			return -EINVAL;
1545 	}
1546 	if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1547 		return -EINVAL;
1548 	if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK))
1549 		accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1550 	if (accept->iou_flags & IORING_ACCEPT_MULTISHOT)
1551 		req->flags |= REQ_F_APOLL_MULTISHOT;
1552 	if (accept->iou_flags & IORING_ACCEPT_DONTWAIT)
1553 		req->flags |= REQ_F_NOWAIT;
1554 	return 0;
1555 }
1556 
1557 int io_accept(struct io_kiocb *req, unsigned int issue_flags)
1558 {
1559 	struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
1560 	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1561 	bool fixed = !!accept->file_slot;
1562 	struct proto_accept_arg arg = {
1563 		.flags = force_nonblock ? O_NONBLOCK : 0,
1564 	};
1565 	struct file *file;
1566 	unsigned cflags;
1567 	int ret, fd;
1568 
1569 	if (!(req->flags & REQ_F_POLLED) &&
1570 	    accept->iou_flags & IORING_ACCEPT_POLL_FIRST)
1571 		return -EAGAIN;
1572 
1573 retry:
1574 	if (!fixed) {
1575 		fd = __get_unused_fd_flags(accept->flags, accept->nofile);
1576 		if (unlikely(fd < 0))
1577 			return fd;
1578 	}
1579 	arg.err = 0;
1580 	arg.is_empty = -1;
1581 	file = do_accept(req->file, &arg, accept->addr, accept->addr_len,
1582 			 accept->flags);
1583 	if (IS_ERR(file)) {
1584 		if (!fixed)
1585 			put_unused_fd(fd);
1586 		ret = PTR_ERR(file);
1587 		if (ret == -EAGAIN && force_nonblock &&
1588 		    !(accept->iou_flags & IORING_ACCEPT_DONTWAIT)) {
1589 			/*
1590 			 * if it's multishot and polled, we don't need to
1591 			 * return EAGAIN to arm the poll infra since it
1592 			 * has already been done
1593 			 */
1594 			if (issue_flags & IO_URING_F_MULTISHOT)
1595 				return IOU_ISSUE_SKIP_COMPLETE;
1596 			return ret;
1597 		}
1598 		if (ret == -ERESTARTSYS)
1599 			ret = -EINTR;
1600 		req_set_fail(req);
1601 	} else if (!fixed) {
1602 		fd_install(fd, file);
1603 		ret = fd;
1604 	} else {
1605 		ret = io_fixed_fd_install(req, issue_flags, file,
1606 						accept->file_slot);
1607 	}
1608 
1609 	cflags = 0;
1610 	if (!arg.is_empty)
1611 		cflags |= IORING_CQE_F_SOCK_NONEMPTY;
1612 
1613 	if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
1614 		io_req_set_res(req, ret, cflags);
1615 		return IOU_OK;
1616 	}
1617 
1618 	if (ret < 0)
1619 		return ret;
1620 	if (io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) {
1621 		if (cflags & IORING_CQE_F_SOCK_NONEMPTY || arg.is_empty == -1)
1622 			goto retry;
1623 		if (issue_flags & IO_URING_F_MULTISHOT)
1624 			return IOU_ISSUE_SKIP_COMPLETE;
1625 		return -EAGAIN;
1626 	}
1627 
1628 	io_req_set_res(req, ret, cflags);
1629 	return IOU_STOP_MULTISHOT;
1630 }
1631 
1632 int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1633 {
1634 	struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
1635 
1636 	if (sqe->addr || sqe->rw_flags || sqe->buf_index)
1637 		return -EINVAL;
1638 
1639 	sock->domain = READ_ONCE(sqe->fd);
1640 	sock->type = READ_ONCE(sqe->off);
1641 	sock->protocol = READ_ONCE(sqe->len);
1642 	sock->file_slot = READ_ONCE(sqe->file_index);
1643 	sock->nofile = rlimit(RLIMIT_NOFILE);
1644 
1645 	sock->flags = sock->type & ~SOCK_TYPE_MASK;
1646 	if (sock->file_slot && (sock->flags & SOCK_CLOEXEC))
1647 		return -EINVAL;
1648 	if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1649 		return -EINVAL;
1650 	return 0;
1651 }
1652 
1653 int io_socket(struct io_kiocb *req, unsigned int issue_flags)
1654 {
1655 	struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
1656 	bool fixed = !!sock->file_slot;
1657 	struct file *file;
1658 	int ret, fd;
1659 
1660 	if (!fixed) {
1661 		fd = __get_unused_fd_flags(sock->flags, sock->nofile);
1662 		if (unlikely(fd < 0))
1663 			return fd;
1664 	}
1665 	file = __sys_socket_file(sock->domain, sock->type, sock->protocol);
1666 	if (IS_ERR(file)) {
1667 		if (!fixed)
1668 			put_unused_fd(fd);
1669 		ret = PTR_ERR(file);
1670 		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1671 			return -EAGAIN;
1672 		if (ret == -ERESTARTSYS)
1673 			ret = -EINTR;
1674 		req_set_fail(req);
1675 	} else if (!fixed) {
1676 		fd_install(fd, file);
1677 		ret = fd;
1678 	} else {
1679 		ret = io_fixed_fd_install(req, issue_flags, file,
1680 					    sock->file_slot);
1681 	}
1682 	io_req_set_res(req, ret, 0);
1683 	return IOU_OK;
1684 }
1685 
1686 int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1687 {
1688 	struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect);
1689 	struct io_async_msghdr *io;
1690 
1691 	if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
1692 		return -EINVAL;
1693 
1694 	conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1695 	conn->addr_len =  READ_ONCE(sqe->addr2);
1696 	conn->in_progress = conn->seen_econnaborted = false;
1697 
1698 	io = io_msg_alloc_async(req);
1699 	if (unlikely(!io))
1700 		return -ENOMEM;
1701 
1702 	return move_addr_to_kernel(conn->addr, conn->addr_len, &io->addr);
1703 }
1704 
1705 int io_connect(struct io_kiocb *req, unsigned int issue_flags)
1706 {
1707 	struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect);
1708 	struct io_async_msghdr *io = req->async_data;
1709 	unsigned file_flags;
1710 	int ret;
1711 	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1712 
1713 	file_flags = force_nonblock ? O_NONBLOCK : 0;
1714 
1715 	ret = __sys_connect_file(req->file, &io->addr, connect->addr_len,
1716 				 file_flags);
1717 	if ((ret == -EAGAIN || ret == -EINPROGRESS || ret == -ECONNABORTED)
1718 	    && force_nonblock) {
1719 		if (ret == -EINPROGRESS) {
1720 			connect->in_progress = true;
1721 		} else if (ret == -ECONNABORTED) {
1722 			if (connect->seen_econnaborted)
1723 				goto out;
1724 			connect->seen_econnaborted = true;
1725 		}
1726 		return -EAGAIN;
1727 	}
1728 	if (connect->in_progress) {
1729 		/*
1730 		 * At least bluetooth will return -EBADFD on a re-connect
1731 		 * attempt, and it's (supposedly) also valid to get -EISCONN
1732 		 * which means the previous result is good. For both of these,
1733 		 * grab the sock_error() and use that for the completion.
1734 		 */
1735 		if (ret == -EBADFD || ret == -EISCONN)
1736 			ret = sock_error(sock_from_file(req->file)->sk);
1737 	}
1738 	if (ret == -ERESTARTSYS)
1739 		ret = -EINTR;
1740 out:
1741 	if (ret < 0)
1742 		req_set_fail(req);
1743 	io_req_msg_cleanup(req, issue_flags);
1744 	io_req_set_res(req, ret, 0);
1745 	return IOU_OK;
1746 }
1747 
1748 int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1749 {
1750 	struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind);
1751 	struct sockaddr __user *uaddr;
1752 	struct io_async_msghdr *io;
1753 
1754 	if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
1755 		return -EINVAL;
1756 
1757 	uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1758 	bind->addr_len =  READ_ONCE(sqe->addr2);
1759 
1760 	io = io_msg_alloc_async(req);
1761 	if (unlikely(!io))
1762 		return -ENOMEM;
1763 	return move_addr_to_kernel(uaddr, bind->addr_len, &io->addr);
1764 }
1765 
1766 int io_bind(struct io_kiocb *req, unsigned int issue_flags)
1767 {
1768 	struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind);
1769 	struct io_async_msghdr *io = req->async_data;
1770 	struct socket *sock;
1771 	int ret;
1772 
1773 	sock = sock_from_file(req->file);
1774 	if (unlikely(!sock))
1775 		return -ENOTSOCK;
1776 
1777 	ret = __sys_bind_socket(sock, &io->addr, bind->addr_len);
1778 	if (ret < 0)
1779 		req_set_fail(req);
1780 	io_req_set_res(req, ret, 0);
1781 	return 0;
1782 }
1783 
1784 int io_listen_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1785 {
1786 	struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen);
1787 
1788 	if (sqe->addr || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in || sqe->addr2)
1789 		return -EINVAL;
1790 
1791 	listen->backlog = READ_ONCE(sqe->len);
1792 	return 0;
1793 }
1794 
1795 int io_listen(struct io_kiocb *req, unsigned int issue_flags)
1796 {
1797 	struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen);
1798 	struct socket *sock;
1799 	int ret;
1800 
1801 	sock = sock_from_file(req->file);
1802 	if (unlikely(!sock))
1803 		return -ENOTSOCK;
1804 
1805 	ret = __sys_listen_socket(sock, listen->backlog);
1806 	if (ret < 0)
1807 		req_set_fail(req);
1808 	io_req_set_res(req, ret, 0);
1809 	return 0;
1810 }
1811 
1812 void io_netmsg_cache_free(const void *entry)
1813 {
1814 	struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry;
1815 
1816 	if (kmsg->free_iov) {
1817 		kasan_mempool_unpoison_object(kmsg->free_iov,
1818 				kmsg->free_iov_nr * sizeof(struct iovec));
1819 		io_netmsg_iovec_free(kmsg);
1820 	}
1821 	kfree(kmsg);
1822 }
1823 #endif
1824