xref: /linux/net/unix/af_unix.c (revision e3617433c3da3d0859a4bc67f3f975e87f650ebf)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * NET4:	Implementation of BSD Unix domain sockets.
4  *
5  * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
6  *
7  * Fixes:
8  *		Linus Torvalds	:	Assorted bug cures.
9  *		Niibe Yutaka	:	async I/O support.
10  *		Carsten Paeth	:	PF_UNIX check, address fixes.
11  *		Alan Cox	:	Limit size of allocated blocks.
12  *		Alan Cox	:	Fixed the stupid socketpair bug.
13  *		Alan Cox	:	BSD compatibility fine tuning.
14  *		Alan Cox	:	Fixed a bug in connect when interrupted.
15  *		Alan Cox	:	Sorted out a proper draft version of
16  *					file descriptor passing hacked up from
17  *					Mike Shaver's work.
18  *		Marty Leisner	:	Fixes to fd passing
19  *		Nick Nevin	:	recvmsg bugfix.
20  *		Alan Cox	:	Started proper garbage collector
21  *		Heiko EiBfeldt	:	Missing verify_area check
22  *		Alan Cox	:	Started POSIXisms
23  *		Andreas Schwab	:	Replace inode by dentry for proper
24  *					reference counting
25  *		Kirk Petersen	:	Made this a module
26  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
27  *					Lots of bug fixes.
28  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
29  *					by above two patches.
30  *	     Andrea Arcangeli	:	If possible we block in connect(2)
31  *					if the max backlog of the listen socket
32  *					is been reached. This won't break
33  *					old apps and it will avoid huge amount
34  *					of socks hashed (this for unix_gc()
35  *					performances reasons).
36  *					Security fix that limits the max
37  *					number of socks to 2*max_files and
38  *					the number of skb queueable in the
39  *					dgram receiver.
40  *		Artur Skawina   :	Hash function optimizations
41  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
42  *	      Malcolm Beattie   :	Set peercred for socketpair
43  *	     Michal Ostrowski   :       Module initialization cleanup.
44  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
45  *	     				the core infrastructure is doing that
46  *	     				for all net proto families now (2.5.69+)
47  *
48  * Known differences from reference BSD that was tested:
49  *
50  *	[TO FIX]
51  *	ECONNREFUSED is not returned from one end of a connected() socket to the
52  *		other the moment one end closes.
53  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
54  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
55  *	[NOT TO FIX]
56  *	accept() returns a path name even if the connecting socket has closed
57  *		in the meantime (BSD loses the path and gives up).
58  *	accept() returns 0 length path for an unbound connector. BSD returns 16
59  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
61  *	BSD af_unix apparently has connect forgetting to block properly.
62  *		(need to check this with the POSIX spec in detail)
63  *
64  * Differences from 2.0.0-11-... (ANK)
65  *	Bug fixes and improvements.
66  *		- client shutdown killed server socket.
67  *		- removed all useless cli/sti pairs.
68  *
69  *	Semantic changes/extensions.
70  *		- generic control message passing.
71  *		- SCM_CREDENTIALS control message.
72  *		- "Abstract" (not FS based) socket bindings.
73  *		  Abstract names are sequences of bytes (not zero terminated)
74  *		  started by 0, so that this name space does not intersect
75  *		  with BSD names.
76  */
77 
78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
79 
80 #include <linux/module.h>
81 #include <linux/kernel.h>
82 #include <linux/signal.h>
83 #include <linux/sched/signal.h>
84 #include <linux/errno.h>
85 #include <linux/string.h>
86 #include <linux/stat.h>
87 #include <linux/dcache.h>
88 #include <linux/namei.h>
89 #include <linux/socket.h>
90 #include <linux/un.h>
91 #include <linux/fcntl.h>
92 #include <linux/termios.h>
93 #include <linux/sockios.h>
94 #include <linux/net.h>
95 #include <linux/in.h>
96 #include <linux/fs.h>
97 #include <linux/slab.h>
98 #include <linux/uaccess.h>
99 #include <linux/skbuff.h>
100 #include <linux/netdevice.h>
101 #include <net/net_namespace.h>
102 #include <net/sock.h>
103 #include <net/tcp_states.h>
104 #include <net/af_unix.h>
105 #include <linux/proc_fs.h>
106 #include <linux/seq_file.h>
107 #include <net/scm.h>
108 #include <linux/init.h>
109 #include <linux/poll.h>
110 #include <linux/rtnetlink.h>
111 #include <linux/mount.h>
112 #include <net/checksum.h>
113 #include <linux/security.h>
114 #include <linux/freezer.h>
115 #include <linux/file.h>
116 #include <linux/btf_ids.h>
117 
118 #include "scm.h"
119 
120 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
121 EXPORT_SYMBOL_GPL(unix_socket_table);
122 DEFINE_SPINLOCK(unix_table_lock);
123 EXPORT_SYMBOL_GPL(unix_table_lock);
124 static atomic_long_t unix_nr_socks;
125 
126 
127 static struct hlist_head *unix_sockets_unbound(void *addr)
128 {
129 	unsigned long hash = (unsigned long)addr;
130 
131 	hash ^= hash >> 16;
132 	hash ^= hash >> 8;
133 	hash %= UNIX_HASH_SIZE;
134 	return &unix_socket_table[UNIX_HASH_SIZE + hash];
135 }
136 
137 #define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
138 
139 #ifdef CONFIG_SECURITY_NETWORK
140 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
141 {
142 	UNIXCB(skb).secid = scm->secid;
143 }
144 
145 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
146 {
147 	scm->secid = UNIXCB(skb).secid;
148 }
149 
150 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
151 {
152 	return (scm->secid == UNIXCB(skb).secid);
153 }
154 #else
155 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
156 { }
157 
158 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
159 { }
160 
161 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
162 {
163 	return true;
164 }
165 #endif /* CONFIG_SECURITY_NETWORK */
166 
167 /*
168  *  SMP locking strategy:
169  *    hash table is protected with spinlock unix_table_lock
170  *    each socket state is protected by separate spin lock.
171  */
172 
173 static inline unsigned int unix_hash_fold(__wsum n)
174 {
175 	unsigned int hash = (__force unsigned int)csum_fold(n);
176 
177 	hash ^= hash>>8;
178 	return hash&(UNIX_HASH_SIZE-1);
179 }
180 
181 #define unix_peer(sk) (unix_sk(sk)->peer)
182 
183 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
184 {
185 	return unix_peer(osk) == sk;
186 }
187 
188 static inline int unix_may_send(struct sock *sk, struct sock *osk)
189 {
190 	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
191 }
192 
193 static inline int unix_recvq_full(const struct sock *sk)
194 {
195 	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
196 }
197 
198 static inline int unix_recvq_full_lockless(const struct sock *sk)
199 {
200 	return skb_queue_len_lockless(&sk->sk_receive_queue) >
201 		READ_ONCE(sk->sk_max_ack_backlog);
202 }
203 
204 struct sock *unix_peer_get(struct sock *s)
205 {
206 	struct sock *peer;
207 
208 	unix_state_lock(s);
209 	peer = unix_peer(s);
210 	if (peer)
211 		sock_hold(peer);
212 	unix_state_unlock(s);
213 	return peer;
214 }
215 EXPORT_SYMBOL_GPL(unix_peer_get);
216 
217 static inline void unix_release_addr(struct unix_address *addr)
218 {
219 	if (refcount_dec_and_test(&addr->refcnt))
220 		kfree(addr);
221 }
222 
223 /*
224  *	Check unix socket name:
225  *		- should be not zero length.
226  *	        - if started by not zero, should be NULL terminated (FS object)
227  *		- if started by zero, it is abstract name.
228  */
229 
230 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
231 {
232 	*hashp = 0;
233 
234 	if (len <= sizeof(short) || len > sizeof(*sunaddr))
235 		return -EINVAL;
236 	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
237 		return -EINVAL;
238 	if (sunaddr->sun_path[0]) {
239 		/*
240 		 * This may look like an off by one error but it is a bit more
241 		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
242 		 * sun_path[108] doesn't as such exist.  However in kernel space
243 		 * we are guaranteed that it is a valid memory location in our
244 		 * kernel address buffer.
245 		 */
246 		((char *)sunaddr)[len] = 0;
247 		len = strlen(sunaddr->sun_path)+1+sizeof(short);
248 		return len;
249 	}
250 
251 	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
252 	return len;
253 }
254 
255 static void __unix_remove_socket(struct sock *sk)
256 {
257 	sk_del_node_init(sk);
258 }
259 
260 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
261 {
262 	WARN_ON(!sk_unhashed(sk));
263 	sk_add_node(sk, list);
264 }
265 
266 static void __unix_set_addr(struct sock *sk, struct unix_address *addr,
267 			    unsigned hash)
268 {
269 	__unix_remove_socket(sk);
270 	smp_store_release(&unix_sk(sk)->addr, addr);
271 	__unix_insert_socket(&unix_socket_table[hash], sk);
272 }
273 
274 static inline void unix_remove_socket(struct sock *sk)
275 {
276 	spin_lock(&unix_table_lock);
277 	__unix_remove_socket(sk);
278 	spin_unlock(&unix_table_lock);
279 }
280 
281 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
282 {
283 	spin_lock(&unix_table_lock);
284 	__unix_insert_socket(list, sk);
285 	spin_unlock(&unix_table_lock);
286 }
287 
288 static struct sock *__unix_find_socket_byname(struct net *net,
289 					      struct sockaddr_un *sunname,
290 					      int len, unsigned int hash)
291 {
292 	struct sock *s;
293 
294 	sk_for_each(s, &unix_socket_table[hash]) {
295 		struct unix_sock *u = unix_sk(s);
296 
297 		if (!net_eq(sock_net(s), net))
298 			continue;
299 
300 		if (u->addr->len == len &&
301 		    !memcmp(u->addr->name, sunname, len))
302 			return s;
303 	}
304 	return NULL;
305 }
306 
307 static inline struct sock *unix_find_socket_byname(struct net *net,
308 						   struct sockaddr_un *sunname,
309 						   int len, unsigned int hash)
310 {
311 	struct sock *s;
312 
313 	spin_lock(&unix_table_lock);
314 	s = __unix_find_socket_byname(net, sunname, len, hash);
315 	if (s)
316 		sock_hold(s);
317 	spin_unlock(&unix_table_lock);
318 	return s;
319 }
320 
321 static struct sock *unix_find_socket_byinode(struct inode *i)
322 {
323 	struct sock *s;
324 
325 	spin_lock(&unix_table_lock);
326 	sk_for_each(s,
327 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
328 		struct dentry *dentry = unix_sk(s)->path.dentry;
329 
330 		if (dentry && d_backing_inode(dentry) == i) {
331 			sock_hold(s);
332 			goto found;
333 		}
334 	}
335 	s = NULL;
336 found:
337 	spin_unlock(&unix_table_lock);
338 	return s;
339 }
340 
341 /* Support code for asymmetrically connected dgram sockets
342  *
343  * If a datagram socket is connected to a socket not itself connected
344  * to the first socket (eg, /dev/log), clients may only enqueue more
345  * messages if the present receive queue of the server socket is not
346  * "too large". This means there's a second writeability condition
347  * poll and sendmsg need to test. The dgram recv code will do a wake
348  * up on the peer_wait wait queue of a socket upon reception of a
349  * datagram which needs to be propagated to sleeping would-be writers
350  * since these might not have sent anything so far. This can't be
351  * accomplished via poll_wait because the lifetime of the server
352  * socket might be less than that of its clients if these break their
353  * association with it or if the server socket is closed while clients
354  * are still connected to it and there's no way to inform "a polling
355  * implementation" that it should let go of a certain wait queue
356  *
357  * In order to propagate a wake up, a wait_queue_entry_t of the client
358  * socket is enqueued on the peer_wait queue of the server socket
359  * whose wake function does a wake_up on the ordinary client socket
360  * wait queue. This connection is established whenever a write (or
361  * poll for write) hit the flow control condition and broken when the
362  * association to the server socket is dissolved or after a wake up
363  * was relayed.
364  */
365 
366 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
367 				      void *key)
368 {
369 	struct unix_sock *u;
370 	wait_queue_head_t *u_sleep;
371 
372 	u = container_of(q, struct unix_sock, peer_wake);
373 
374 	__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
375 			    q);
376 	u->peer_wake.private = NULL;
377 
378 	/* relaying can only happen while the wq still exists */
379 	u_sleep = sk_sleep(&u->sk);
380 	if (u_sleep)
381 		wake_up_interruptible_poll(u_sleep, key_to_poll(key));
382 
383 	return 0;
384 }
385 
386 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
387 {
388 	struct unix_sock *u, *u_other;
389 	int rc;
390 
391 	u = unix_sk(sk);
392 	u_other = unix_sk(other);
393 	rc = 0;
394 	spin_lock(&u_other->peer_wait.lock);
395 
396 	if (!u->peer_wake.private) {
397 		u->peer_wake.private = other;
398 		__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
399 
400 		rc = 1;
401 	}
402 
403 	spin_unlock(&u_other->peer_wait.lock);
404 	return rc;
405 }
406 
407 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
408 					    struct sock *other)
409 {
410 	struct unix_sock *u, *u_other;
411 
412 	u = unix_sk(sk);
413 	u_other = unix_sk(other);
414 	spin_lock(&u_other->peer_wait.lock);
415 
416 	if (u->peer_wake.private == other) {
417 		__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
418 		u->peer_wake.private = NULL;
419 	}
420 
421 	spin_unlock(&u_other->peer_wait.lock);
422 }
423 
424 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
425 						   struct sock *other)
426 {
427 	unix_dgram_peer_wake_disconnect(sk, other);
428 	wake_up_interruptible_poll(sk_sleep(sk),
429 				   EPOLLOUT |
430 				   EPOLLWRNORM |
431 				   EPOLLWRBAND);
432 }
433 
434 /* preconditions:
435  *	- unix_peer(sk) == other
436  *	- association is stable
437  */
438 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
439 {
440 	int connected;
441 
442 	connected = unix_dgram_peer_wake_connect(sk, other);
443 
444 	/* If other is SOCK_DEAD, we want to make sure we signal
445 	 * POLLOUT, such that a subsequent write() can get a
446 	 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
447 	 * to other and its full, we will hang waiting for POLLOUT.
448 	 */
449 	if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
450 		return 1;
451 
452 	if (connected)
453 		unix_dgram_peer_wake_disconnect(sk, other);
454 
455 	return 0;
456 }
457 
458 static int unix_writable(const struct sock *sk)
459 {
460 	return sk->sk_state != TCP_LISTEN &&
461 	       (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
462 }
463 
464 static void unix_write_space(struct sock *sk)
465 {
466 	struct socket_wq *wq;
467 
468 	rcu_read_lock();
469 	if (unix_writable(sk)) {
470 		wq = rcu_dereference(sk->sk_wq);
471 		if (skwq_has_sleeper(wq))
472 			wake_up_interruptible_sync_poll(&wq->wait,
473 				EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
474 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
475 	}
476 	rcu_read_unlock();
477 }
478 
479 /* When dgram socket disconnects (or changes its peer), we clear its receive
480  * queue of packets arrived from previous peer. First, it allows to do
481  * flow control based only on wmem_alloc; second, sk connected to peer
482  * may receive messages only from that peer. */
483 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
484 {
485 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
486 		skb_queue_purge(&sk->sk_receive_queue);
487 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
488 
489 		/* If one link of bidirectional dgram pipe is disconnected,
490 		 * we signal error. Messages are lost. Do not make this,
491 		 * when peer was not connected to us.
492 		 */
493 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
494 			other->sk_err = ECONNRESET;
495 			sk_error_report(other);
496 		}
497 	}
498 	other->sk_state = TCP_CLOSE;
499 }
500 
501 static void unix_sock_destructor(struct sock *sk)
502 {
503 	struct unix_sock *u = unix_sk(sk);
504 
505 	skb_queue_purge(&sk->sk_receive_queue);
506 
507 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
508 	if (u->oob_skb) {
509 		kfree_skb(u->oob_skb);
510 		u->oob_skb = NULL;
511 	}
512 #endif
513 	WARN_ON(refcount_read(&sk->sk_wmem_alloc));
514 	WARN_ON(!sk_unhashed(sk));
515 	WARN_ON(sk->sk_socket);
516 	if (!sock_flag(sk, SOCK_DEAD)) {
517 		pr_info("Attempt to release alive unix socket: %p\n", sk);
518 		return;
519 	}
520 
521 	if (u->addr)
522 		unix_release_addr(u->addr);
523 
524 	atomic_long_dec(&unix_nr_socks);
525 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
526 #ifdef UNIX_REFCNT_DEBUG
527 	pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
528 		atomic_long_read(&unix_nr_socks));
529 #endif
530 }
531 
532 static void unix_release_sock(struct sock *sk, int embrion)
533 {
534 	struct unix_sock *u = unix_sk(sk);
535 	struct path path;
536 	struct sock *skpair;
537 	struct sk_buff *skb;
538 	int state;
539 
540 	unix_remove_socket(sk);
541 
542 	/* Clear state */
543 	unix_state_lock(sk);
544 	sock_orphan(sk);
545 	sk->sk_shutdown = SHUTDOWN_MASK;
546 	path	     = u->path;
547 	u->path.dentry = NULL;
548 	u->path.mnt = NULL;
549 	state = sk->sk_state;
550 	sk->sk_state = TCP_CLOSE;
551 
552 	skpair = unix_peer(sk);
553 	unix_peer(sk) = NULL;
554 
555 	unix_state_unlock(sk);
556 
557 	wake_up_interruptible_all(&u->peer_wait);
558 
559 	if (skpair != NULL) {
560 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
561 			unix_state_lock(skpair);
562 			/* No more writes */
563 			skpair->sk_shutdown = SHUTDOWN_MASK;
564 			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
565 				skpair->sk_err = ECONNRESET;
566 			unix_state_unlock(skpair);
567 			skpair->sk_state_change(skpair);
568 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
569 		}
570 
571 		unix_dgram_peer_wake_disconnect(sk, skpair);
572 		sock_put(skpair); /* It may now die */
573 	}
574 
575 	/* Try to flush out this socket. Throw out buffers at least */
576 
577 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
578 		if (state == TCP_LISTEN)
579 			unix_release_sock(skb->sk, 1);
580 		/* passed fds are erased in the kfree_skb hook	      */
581 		UNIXCB(skb).consumed = skb->len;
582 		kfree_skb(skb);
583 	}
584 
585 	if (path.dentry)
586 		path_put(&path);
587 
588 	sock_put(sk);
589 
590 	/* ---- Socket is dead now and most probably destroyed ---- */
591 
592 	/*
593 	 * Fixme: BSD difference: In BSD all sockets connected to us get
594 	 *	  ECONNRESET and we die on the spot. In Linux we behave
595 	 *	  like files and pipes do and wait for the last
596 	 *	  dereference.
597 	 *
598 	 * Can't we simply set sock->err?
599 	 *
600 	 *	  What the above comment does talk about? --ANK(980817)
601 	 */
602 
603 	if (unix_tot_inflight)
604 		unix_gc();		/* Garbage collect fds */
605 }
606 
607 static void init_peercred(struct sock *sk)
608 {
609 	const struct cred *old_cred;
610 	struct pid *old_pid;
611 
612 	spin_lock(&sk->sk_peer_lock);
613 	old_pid = sk->sk_peer_pid;
614 	old_cred = sk->sk_peer_cred;
615 	sk->sk_peer_pid  = get_pid(task_tgid(current));
616 	sk->sk_peer_cred = get_current_cred();
617 	spin_unlock(&sk->sk_peer_lock);
618 
619 	put_pid(old_pid);
620 	put_cred(old_cred);
621 }
622 
623 static void copy_peercred(struct sock *sk, struct sock *peersk)
624 {
625 	const struct cred *old_cred;
626 	struct pid *old_pid;
627 
628 	if (sk < peersk) {
629 		spin_lock(&sk->sk_peer_lock);
630 		spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
631 	} else {
632 		spin_lock(&peersk->sk_peer_lock);
633 		spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
634 	}
635 	old_pid = sk->sk_peer_pid;
636 	old_cred = sk->sk_peer_cred;
637 	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
638 	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
639 
640 	spin_unlock(&sk->sk_peer_lock);
641 	spin_unlock(&peersk->sk_peer_lock);
642 
643 	put_pid(old_pid);
644 	put_cred(old_cred);
645 }
646 
647 static int unix_listen(struct socket *sock, int backlog)
648 {
649 	int err;
650 	struct sock *sk = sock->sk;
651 	struct unix_sock *u = unix_sk(sk);
652 
653 	err = -EOPNOTSUPP;
654 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
655 		goto out;	/* Only stream/seqpacket sockets accept */
656 	err = -EINVAL;
657 	if (!u->addr)
658 		goto out;	/* No listens on an unbound socket */
659 	unix_state_lock(sk);
660 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
661 		goto out_unlock;
662 	if (backlog > sk->sk_max_ack_backlog)
663 		wake_up_interruptible_all(&u->peer_wait);
664 	sk->sk_max_ack_backlog	= backlog;
665 	sk->sk_state		= TCP_LISTEN;
666 	/* set credentials so connect can copy them */
667 	init_peercred(sk);
668 	err = 0;
669 
670 out_unlock:
671 	unix_state_unlock(sk);
672 out:
673 	return err;
674 }
675 
676 static int unix_release(struct socket *);
677 static int unix_bind(struct socket *, struct sockaddr *, int);
678 static int unix_stream_connect(struct socket *, struct sockaddr *,
679 			       int addr_len, int flags);
680 static int unix_socketpair(struct socket *, struct socket *);
681 static int unix_accept(struct socket *, struct socket *, int, bool);
682 static int unix_getname(struct socket *, struct sockaddr *, int);
683 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
684 static __poll_t unix_dgram_poll(struct file *, struct socket *,
685 				    poll_table *);
686 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
687 #ifdef CONFIG_COMPAT
688 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
689 #endif
690 static int unix_shutdown(struct socket *, int);
691 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
692 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
693 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
694 				    size_t size, int flags);
695 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
696 				       struct pipe_inode_info *, size_t size,
697 				       unsigned int flags);
698 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
699 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
700 static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
701 			  sk_read_actor_t recv_actor);
702 static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc,
703 				 sk_read_actor_t recv_actor);
704 static int unix_dgram_connect(struct socket *, struct sockaddr *,
705 			      int, int);
706 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
707 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
708 				  int);
709 
710 static int unix_set_peek_off(struct sock *sk, int val)
711 {
712 	struct unix_sock *u = unix_sk(sk);
713 
714 	if (mutex_lock_interruptible(&u->iolock))
715 		return -EINTR;
716 
717 	sk->sk_peek_off = val;
718 	mutex_unlock(&u->iolock);
719 
720 	return 0;
721 }
722 
723 #ifdef CONFIG_PROC_FS
724 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
725 {
726 	struct sock *sk = sock->sk;
727 	struct unix_sock *u;
728 
729 	if (sk) {
730 		u = unix_sk(sock->sk);
731 		seq_printf(m, "scm_fds: %u\n",
732 			   atomic_read(&u->scm_stat.nr_fds));
733 	}
734 }
735 #else
736 #define unix_show_fdinfo NULL
737 #endif
738 
739 static const struct proto_ops unix_stream_ops = {
740 	.family =	PF_UNIX,
741 	.owner =	THIS_MODULE,
742 	.release =	unix_release,
743 	.bind =		unix_bind,
744 	.connect =	unix_stream_connect,
745 	.socketpair =	unix_socketpair,
746 	.accept =	unix_accept,
747 	.getname =	unix_getname,
748 	.poll =		unix_poll,
749 	.ioctl =	unix_ioctl,
750 #ifdef CONFIG_COMPAT
751 	.compat_ioctl =	unix_compat_ioctl,
752 #endif
753 	.listen =	unix_listen,
754 	.shutdown =	unix_shutdown,
755 	.sendmsg =	unix_stream_sendmsg,
756 	.recvmsg =	unix_stream_recvmsg,
757 	.read_sock =	unix_stream_read_sock,
758 	.mmap =		sock_no_mmap,
759 	.sendpage =	unix_stream_sendpage,
760 	.splice_read =	unix_stream_splice_read,
761 	.set_peek_off =	unix_set_peek_off,
762 	.show_fdinfo =	unix_show_fdinfo,
763 };
764 
765 static const struct proto_ops unix_dgram_ops = {
766 	.family =	PF_UNIX,
767 	.owner =	THIS_MODULE,
768 	.release =	unix_release,
769 	.bind =		unix_bind,
770 	.connect =	unix_dgram_connect,
771 	.socketpair =	unix_socketpair,
772 	.accept =	sock_no_accept,
773 	.getname =	unix_getname,
774 	.poll =		unix_dgram_poll,
775 	.ioctl =	unix_ioctl,
776 #ifdef CONFIG_COMPAT
777 	.compat_ioctl =	unix_compat_ioctl,
778 #endif
779 	.listen =	sock_no_listen,
780 	.shutdown =	unix_shutdown,
781 	.sendmsg =	unix_dgram_sendmsg,
782 	.read_sock =	unix_read_sock,
783 	.recvmsg =	unix_dgram_recvmsg,
784 	.mmap =		sock_no_mmap,
785 	.sendpage =	sock_no_sendpage,
786 	.set_peek_off =	unix_set_peek_off,
787 	.show_fdinfo =	unix_show_fdinfo,
788 };
789 
790 static const struct proto_ops unix_seqpacket_ops = {
791 	.family =	PF_UNIX,
792 	.owner =	THIS_MODULE,
793 	.release =	unix_release,
794 	.bind =		unix_bind,
795 	.connect =	unix_stream_connect,
796 	.socketpair =	unix_socketpair,
797 	.accept =	unix_accept,
798 	.getname =	unix_getname,
799 	.poll =		unix_dgram_poll,
800 	.ioctl =	unix_ioctl,
801 #ifdef CONFIG_COMPAT
802 	.compat_ioctl =	unix_compat_ioctl,
803 #endif
804 	.listen =	unix_listen,
805 	.shutdown =	unix_shutdown,
806 	.sendmsg =	unix_seqpacket_sendmsg,
807 	.recvmsg =	unix_seqpacket_recvmsg,
808 	.mmap =		sock_no_mmap,
809 	.sendpage =	sock_no_sendpage,
810 	.set_peek_off =	unix_set_peek_off,
811 	.show_fdinfo =	unix_show_fdinfo,
812 };
813 
814 static void unix_close(struct sock *sk, long timeout)
815 {
816 	/* Nothing to do here, unix socket does not need a ->close().
817 	 * This is merely for sockmap.
818 	 */
819 }
820 
821 static void unix_unhash(struct sock *sk)
822 {
823 	/* Nothing to do here, unix socket does not need a ->unhash().
824 	 * This is merely for sockmap.
825 	 */
826 }
827 
828 struct proto unix_dgram_proto = {
829 	.name			= "UNIX",
830 	.owner			= THIS_MODULE,
831 	.obj_size		= sizeof(struct unix_sock),
832 	.close			= unix_close,
833 #ifdef CONFIG_BPF_SYSCALL
834 	.psock_update_sk_prot	= unix_dgram_bpf_update_proto,
835 #endif
836 };
837 
838 struct proto unix_stream_proto = {
839 	.name			= "UNIX-STREAM",
840 	.owner			= THIS_MODULE,
841 	.obj_size		= sizeof(struct unix_sock),
842 	.close			= unix_close,
843 	.unhash			= unix_unhash,
844 #ifdef CONFIG_BPF_SYSCALL
845 	.psock_update_sk_prot	= unix_stream_bpf_update_proto,
846 #endif
847 };
848 
849 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
850 {
851 	struct unix_sock *u;
852 	struct sock *sk;
853 	int err;
854 
855 	atomic_long_inc(&unix_nr_socks);
856 	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) {
857 		err = -ENFILE;
858 		goto err;
859 	}
860 
861 	if (type == SOCK_STREAM)
862 		sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
863 	else /*dgram and  seqpacket */
864 		sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
865 
866 	if (!sk) {
867 		err = -ENOMEM;
868 		goto err;
869 	}
870 
871 	sock_init_data(sock, sk);
872 
873 	sk->sk_allocation	= GFP_KERNEL_ACCOUNT;
874 	sk->sk_write_space	= unix_write_space;
875 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
876 	sk->sk_destruct		= unix_sock_destructor;
877 	u	  = unix_sk(sk);
878 	u->path.dentry = NULL;
879 	u->path.mnt = NULL;
880 	spin_lock_init(&u->lock);
881 	atomic_long_set(&u->inflight, 0);
882 	INIT_LIST_HEAD(&u->link);
883 	mutex_init(&u->iolock); /* single task reading lock */
884 	mutex_init(&u->bindlock); /* single task binding lock */
885 	init_waitqueue_head(&u->peer_wait);
886 	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
887 	memset(&u->scm_stat, 0, sizeof(struct scm_stat));
888 	unix_insert_socket(unix_sockets_unbound(sk), sk);
889 
890 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
891 
892 	return sk;
893 
894 err:
895 	atomic_long_dec(&unix_nr_socks);
896 	return ERR_PTR(err);
897 }
898 
899 static int unix_create(struct net *net, struct socket *sock, int protocol,
900 		       int kern)
901 {
902 	struct sock *sk;
903 
904 	if (protocol && protocol != PF_UNIX)
905 		return -EPROTONOSUPPORT;
906 
907 	sock->state = SS_UNCONNECTED;
908 
909 	switch (sock->type) {
910 	case SOCK_STREAM:
911 		sock->ops = &unix_stream_ops;
912 		break;
913 		/*
914 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
915 		 *	nothing uses it.
916 		 */
917 	case SOCK_RAW:
918 		sock->type = SOCK_DGRAM;
919 		fallthrough;
920 	case SOCK_DGRAM:
921 		sock->ops = &unix_dgram_ops;
922 		break;
923 	case SOCK_SEQPACKET:
924 		sock->ops = &unix_seqpacket_ops;
925 		break;
926 	default:
927 		return -ESOCKTNOSUPPORT;
928 	}
929 
930 	sk = unix_create1(net, sock, kern, sock->type);
931 	if (IS_ERR(sk))
932 		return PTR_ERR(sk);
933 
934 	return 0;
935 }
936 
937 static int unix_release(struct socket *sock)
938 {
939 	struct sock *sk = sock->sk;
940 
941 	if (!sk)
942 		return 0;
943 
944 	sk->sk_prot->close(sk, 0);
945 	unix_release_sock(sk, 0);
946 	sock->sk = NULL;
947 
948 	return 0;
949 }
950 
951 static int unix_autobind(struct socket *sock)
952 {
953 	struct sock *sk = sock->sk;
954 	struct net *net = sock_net(sk);
955 	struct unix_sock *u = unix_sk(sk);
956 	static u32 ordernum = 1;
957 	struct unix_address *addr;
958 	int err;
959 	unsigned int retries = 0;
960 
961 	err = mutex_lock_interruptible(&u->bindlock);
962 	if (err)
963 		return err;
964 
965 	if (u->addr)
966 		goto out;
967 
968 	err = -ENOMEM;
969 	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
970 	if (!addr)
971 		goto out;
972 
973 	addr->name->sun_family = AF_UNIX;
974 	refcount_set(&addr->refcnt, 1);
975 
976 retry:
977 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
978 	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
979 	addr->hash ^= sk->sk_type;
980 
981 	spin_lock(&unix_table_lock);
982 	ordernum = (ordernum+1)&0xFFFFF;
983 
984 	if (__unix_find_socket_byname(net, addr->name, addr->len, addr->hash)) {
985 		spin_unlock(&unix_table_lock);
986 		/*
987 		 * __unix_find_socket_byname() may take long time if many names
988 		 * are already in use.
989 		 */
990 		cond_resched();
991 		/* Give up if all names seems to be in use. */
992 		if (retries++ == 0xFFFFF) {
993 			err = -ENOSPC;
994 			kfree(addr);
995 			goto out;
996 		}
997 		goto retry;
998 	}
999 
1000 	__unix_set_addr(sk, addr, addr->hash);
1001 	spin_unlock(&unix_table_lock);
1002 	err = 0;
1003 
1004 out:	mutex_unlock(&u->bindlock);
1005 	return err;
1006 }
1007 
1008 static struct sock *unix_find_other(struct net *net,
1009 				    struct sockaddr_un *sunname, int len,
1010 				    int type, unsigned int hash, int *error)
1011 {
1012 	struct sock *u;
1013 	struct path path;
1014 	int err = 0;
1015 
1016 	if (sunname->sun_path[0]) {
1017 		struct inode *inode;
1018 		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
1019 		if (err)
1020 			goto fail;
1021 		inode = d_backing_inode(path.dentry);
1022 		err = path_permission(&path, MAY_WRITE);
1023 		if (err)
1024 			goto put_fail;
1025 
1026 		err = -ECONNREFUSED;
1027 		if (!S_ISSOCK(inode->i_mode))
1028 			goto put_fail;
1029 		u = unix_find_socket_byinode(inode);
1030 		if (!u)
1031 			goto put_fail;
1032 
1033 		if (u->sk_type == type)
1034 			touch_atime(&path);
1035 
1036 		path_put(&path);
1037 
1038 		err = -EPROTOTYPE;
1039 		if (u->sk_type != type) {
1040 			sock_put(u);
1041 			goto fail;
1042 		}
1043 	} else {
1044 		err = -ECONNREFUSED;
1045 		u = unix_find_socket_byname(net, sunname, len, type ^ hash);
1046 		if (u) {
1047 			struct dentry *dentry;
1048 			dentry = unix_sk(u)->path.dentry;
1049 			if (dentry)
1050 				touch_atime(&unix_sk(u)->path);
1051 		} else
1052 			goto fail;
1053 	}
1054 	return u;
1055 
1056 put_fail:
1057 	path_put(&path);
1058 fail:
1059 	*error = err;
1060 	return NULL;
1061 }
1062 
1063 static int unix_bind_bsd(struct sock *sk, struct unix_address *addr)
1064 {
1065 	struct unix_sock *u = unix_sk(sk);
1066 	umode_t mode = S_IFSOCK |
1067 	       (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
1068 	struct user_namespace *ns; // barf...
1069 	struct path parent;
1070 	struct dentry *dentry;
1071 	unsigned int hash;
1072 	int err;
1073 
1074 	/*
1075 	 * Get the parent directory, calculate the hash for last
1076 	 * component.
1077 	 */
1078 	dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0);
1079 	if (IS_ERR(dentry))
1080 		return PTR_ERR(dentry);
1081 	ns = mnt_user_ns(parent.mnt);
1082 
1083 	/*
1084 	 * All right, let's create it.
1085 	 */
1086 	err = security_path_mknod(&parent, dentry, mode, 0);
1087 	if (!err)
1088 		err = vfs_mknod(ns, d_inode(parent.dentry), dentry, mode, 0);
1089 	if (err)
1090 		goto out;
1091 	err = mutex_lock_interruptible(&u->bindlock);
1092 	if (err)
1093 		goto out_unlink;
1094 	if (u->addr)
1095 		goto out_unlock;
1096 
1097 	addr->hash = UNIX_HASH_SIZE;
1098 	hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1099 	spin_lock(&unix_table_lock);
1100 	u->path.mnt = mntget(parent.mnt);
1101 	u->path.dentry = dget(dentry);
1102 	__unix_set_addr(sk, addr, hash);
1103 	spin_unlock(&unix_table_lock);
1104 	mutex_unlock(&u->bindlock);
1105 	done_path_create(&parent, dentry);
1106 	return 0;
1107 
1108 out_unlock:
1109 	mutex_unlock(&u->bindlock);
1110 	err = -EINVAL;
1111 out_unlink:
1112 	/* failed after successful mknod?  unlink what we'd created... */
1113 	vfs_unlink(ns, d_inode(parent.dentry), dentry, NULL);
1114 out:
1115 	done_path_create(&parent, dentry);
1116 	return err;
1117 }
1118 
1119 static int unix_bind_abstract(struct sock *sk, struct unix_address *addr)
1120 {
1121 	struct unix_sock *u = unix_sk(sk);
1122 	int err;
1123 
1124 	err = mutex_lock_interruptible(&u->bindlock);
1125 	if (err)
1126 		return err;
1127 
1128 	if (u->addr) {
1129 		mutex_unlock(&u->bindlock);
1130 		return -EINVAL;
1131 	}
1132 
1133 	spin_lock(&unix_table_lock);
1134 	if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len,
1135 				      addr->hash)) {
1136 		spin_unlock(&unix_table_lock);
1137 		mutex_unlock(&u->bindlock);
1138 		return -EADDRINUSE;
1139 	}
1140 	__unix_set_addr(sk, addr, addr->hash);
1141 	spin_unlock(&unix_table_lock);
1142 	mutex_unlock(&u->bindlock);
1143 	return 0;
1144 }
1145 
1146 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1147 {
1148 	struct sock *sk = sock->sk;
1149 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1150 	char *sun_path = sunaddr->sun_path;
1151 	int err;
1152 	unsigned int hash;
1153 	struct unix_address *addr;
1154 
1155 	if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1156 	    sunaddr->sun_family != AF_UNIX)
1157 		return -EINVAL;
1158 
1159 	if (addr_len == sizeof(short))
1160 		return unix_autobind(sock);
1161 
1162 	err = unix_mkname(sunaddr, addr_len, &hash);
1163 	if (err < 0)
1164 		return err;
1165 	addr_len = err;
1166 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1167 	if (!addr)
1168 		return -ENOMEM;
1169 
1170 	memcpy(addr->name, sunaddr, addr_len);
1171 	addr->len = addr_len;
1172 	addr->hash = hash ^ sk->sk_type;
1173 	refcount_set(&addr->refcnt, 1);
1174 
1175 	if (sun_path[0])
1176 		err = unix_bind_bsd(sk, addr);
1177 	else
1178 		err = unix_bind_abstract(sk, addr);
1179 	if (err)
1180 		unix_release_addr(addr);
1181 	return err == -EEXIST ? -EADDRINUSE : err;
1182 }
1183 
1184 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1185 {
1186 	if (unlikely(sk1 == sk2) || !sk2) {
1187 		unix_state_lock(sk1);
1188 		return;
1189 	}
1190 	if (sk1 < sk2) {
1191 		unix_state_lock(sk1);
1192 		unix_state_lock_nested(sk2);
1193 	} else {
1194 		unix_state_lock(sk2);
1195 		unix_state_lock_nested(sk1);
1196 	}
1197 }
1198 
1199 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1200 {
1201 	if (unlikely(sk1 == sk2) || !sk2) {
1202 		unix_state_unlock(sk1);
1203 		return;
1204 	}
1205 	unix_state_unlock(sk1);
1206 	unix_state_unlock(sk2);
1207 }
1208 
1209 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1210 			      int alen, int flags)
1211 {
1212 	struct sock *sk = sock->sk;
1213 	struct net *net = sock_net(sk);
1214 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1215 	struct sock *other;
1216 	unsigned int hash;
1217 	int err;
1218 
1219 	err = -EINVAL;
1220 	if (alen < offsetofend(struct sockaddr, sa_family))
1221 		goto out;
1222 
1223 	if (addr->sa_family != AF_UNSPEC) {
1224 		err = unix_mkname(sunaddr, alen, &hash);
1225 		if (err < 0)
1226 			goto out;
1227 		alen = err;
1228 
1229 		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1230 		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1231 			goto out;
1232 
1233 restart:
1234 		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1235 		if (!other)
1236 			goto out;
1237 
1238 		unix_state_double_lock(sk, other);
1239 
1240 		/* Apparently VFS overslept socket death. Retry. */
1241 		if (sock_flag(other, SOCK_DEAD)) {
1242 			unix_state_double_unlock(sk, other);
1243 			sock_put(other);
1244 			goto restart;
1245 		}
1246 
1247 		err = -EPERM;
1248 		if (!unix_may_send(sk, other))
1249 			goto out_unlock;
1250 
1251 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1252 		if (err)
1253 			goto out_unlock;
1254 
1255 		sk->sk_state = other->sk_state = TCP_ESTABLISHED;
1256 	} else {
1257 		/*
1258 		 *	1003.1g breaking connected state with AF_UNSPEC
1259 		 */
1260 		other = NULL;
1261 		unix_state_double_lock(sk, other);
1262 	}
1263 
1264 	/*
1265 	 * If it was connected, reconnect.
1266 	 */
1267 	if (unix_peer(sk)) {
1268 		struct sock *old_peer = unix_peer(sk);
1269 
1270 		unix_peer(sk) = other;
1271 		if (!other)
1272 			sk->sk_state = TCP_CLOSE;
1273 		unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1274 
1275 		unix_state_double_unlock(sk, other);
1276 
1277 		if (other != old_peer)
1278 			unix_dgram_disconnected(sk, old_peer);
1279 		sock_put(old_peer);
1280 	} else {
1281 		unix_peer(sk) = other;
1282 		unix_state_double_unlock(sk, other);
1283 	}
1284 
1285 	return 0;
1286 
1287 out_unlock:
1288 	unix_state_double_unlock(sk, other);
1289 	sock_put(other);
1290 out:
1291 	return err;
1292 }
1293 
1294 static long unix_wait_for_peer(struct sock *other, long timeo)
1295 	__releases(&unix_sk(other)->lock)
1296 {
1297 	struct unix_sock *u = unix_sk(other);
1298 	int sched;
1299 	DEFINE_WAIT(wait);
1300 
1301 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1302 
1303 	sched = !sock_flag(other, SOCK_DEAD) &&
1304 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1305 		unix_recvq_full(other);
1306 
1307 	unix_state_unlock(other);
1308 
1309 	if (sched)
1310 		timeo = schedule_timeout(timeo);
1311 
1312 	finish_wait(&u->peer_wait, &wait);
1313 	return timeo;
1314 }
1315 
1316 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1317 			       int addr_len, int flags)
1318 {
1319 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1320 	struct sock *sk = sock->sk;
1321 	struct net *net = sock_net(sk);
1322 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1323 	struct sock *newsk = NULL;
1324 	struct sock *other = NULL;
1325 	struct sk_buff *skb = NULL;
1326 	unsigned int hash;
1327 	int st;
1328 	int err;
1329 	long timeo;
1330 
1331 	err = unix_mkname(sunaddr, addr_len, &hash);
1332 	if (err < 0)
1333 		goto out;
1334 	addr_len = err;
1335 
1336 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1337 	    (err = unix_autobind(sock)) != 0)
1338 		goto out;
1339 
1340 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1341 
1342 	/* First of all allocate resources.
1343 	   If we will make it after state is locked,
1344 	   we will have to recheck all again in any case.
1345 	 */
1346 
1347 	/* create new sock for complete connection */
1348 	newsk = unix_create1(sock_net(sk), NULL, 0, sock->type);
1349 	if (IS_ERR(newsk)) {
1350 		err = PTR_ERR(newsk);
1351 		newsk = NULL;
1352 		goto out;
1353 	}
1354 
1355 	err = -ENOMEM;
1356 
1357 	/* Allocate skb for sending to listening sock */
1358 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1359 	if (skb == NULL)
1360 		goto out;
1361 
1362 restart:
1363 	/*  Find listening sock. */
1364 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1365 	if (!other)
1366 		goto out;
1367 
1368 	/* Latch state of peer */
1369 	unix_state_lock(other);
1370 
1371 	/* Apparently VFS overslept socket death. Retry. */
1372 	if (sock_flag(other, SOCK_DEAD)) {
1373 		unix_state_unlock(other);
1374 		sock_put(other);
1375 		goto restart;
1376 	}
1377 
1378 	err = -ECONNREFUSED;
1379 	if (other->sk_state != TCP_LISTEN)
1380 		goto out_unlock;
1381 	if (other->sk_shutdown & RCV_SHUTDOWN)
1382 		goto out_unlock;
1383 
1384 	if (unix_recvq_full(other)) {
1385 		err = -EAGAIN;
1386 		if (!timeo)
1387 			goto out_unlock;
1388 
1389 		timeo = unix_wait_for_peer(other, timeo);
1390 
1391 		err = sock_intr_errno(timeo);
1392 		if (signal_pending(current))
1393 			goto out;
1394 		sock_put(other);
1395 		goto restart;
1396 	}
1397 
1398 	/* Latch our state.
1399 
1400 	   It is tricky place. We need to grab our state lock and cannot
1401 	   drop lock on peer. It is dangerous because deadlock is
1402 	   possible. Connect to self case and simultaneous
1403 	   attempt to connect are eliminated by checking socket
1404 	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1405 	   check this before attempt to grab lock.
1406 
1407 	   Well, and we have to recheck the state after socket locked.
1408 	 */
1409 	st = sk->sk_state;
1410 
1411 	switch (st) {
1412 	case TCP_CLOSE:
1413 		/* This is ok... continue with connect */
1414 		break;
1415 	case TCP_ESTABLISHED:
1416 		/* Socket is already connected */
1417 		err = -EISCONN;
1418 		goto out_unlock;
1419 	default:
1420 		err = -EINVAL;
1421 		goto out_unlock;
1422 	}
1423 
1424 	unix_state_lock_nested(sk);
1425 
1426 	if (sk->sk_state != st) {
1427 		unix_state_unlock(sk);
1428 		unix_state_unlock(other);
1429 		sock_put(other);
1430 		goto restart;
1431 	}
1432 
1433 	err = security_unix_stream_connect(sk, other, newsk);
1434 	if (err) {
1435 		unix_state_unlock(sk);
1436 		goto out_unlock;
1437 	}
1438 
1439 	/* The way is open! Fastly set all the necessary fields... */
1440 
1441 	sock_hold(sk);
1442 	unix_peer(newsk)	= sk;
1443 	newsk->sk_state		= TCP_ESTABLISHED;
1444 	newsk->sk_type		= sk->sk_type;
1445 	init_peercred(newsk);
1446 	newu = unix_sk(newsk);
1447 	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1448 	otheru = unix_sk(other);
1449 
1450 	/* copy address information from listening to new sock
1451 	 *
1452 	 * The contents of *(otheru->addr) and otheru->path
1453 	 * are seen fully set up here, since we have found
1454 	 * otheru in hash under unix_table_lock.  Insertion
1455 	 * into the hash chain we'd found it in had been done
1456 	 * in an earlier critical area protected by unix_table_lock,
1457 	 * the same one where we'd set *(otheru->addr) contents,
1458 	 * as well as otheru->path and otheru->addr itself.
1459 	 *
1460 	 * Using smp_store_release() here to set newu->addr
1461 	 * is enough to make those stores, as well as stores
1462 	 * to newu->path visible to anyone who gets newu->addr
1463 	 * by smp_load_acquire().  IOW, the same warranties
1464 	 * as for unix_sock instances bound in unix_bind() or
1465 	 * in unix_autobind().
1466 	 */
1467 	if (otheru->path.dentry) {
1468 		path_get(&otheru->path);
1469 		newu->path = otheru->path;
1470 	}
1471 	refcount_inc(&otheru->addr->refcnt);
1472 	smp_store_release(&newu->addr, otheru->addr);
1473 
1474 	/* Set credentials */
1475 	copy_peercred(sk, other);
1476 
1477 	sock->state	= SS_CONNECTED;
1478 	sk->sk_state	= TCP_ESTABLISHED;
1479 	sock_hold(newsk);
1480 
1481 	smp_mb__after_atomic();	/* sock_hold() does an atomic_inc() */
1482 	unix_peer(sk)	= newsk;
1483 
1484 	unix_state_unlock(sk);
1485 
1486 	/* take ten and send info to listening sock */
1487 	spin_lock(&other->sk_receive_queue.lock);
1488 	__skb_queue_tail(&other->sk_receive_queue, skb);
1489 	spin_unlock(&other->sk_receive_queue.lock);
1490 	unix_state_unlock(other);
1491 	other->sk_data_ready(other);
1492 	sock_put(other);
1493 	return 0;
1494 
1495 out_unlock:
1496 	if (other)
1497 		unix_state_unlock(other);
1498 
1499 out:
1500 	kfree_skb(skb);
1501 	if (newsk)
1502 		unix_release_sock(newsk, 0);
1503 	if (other)
1504 		sock_put(other);
1505 	return err;
1506 }
1507 
1508 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1509 {
1510 	struct sock *ska = socka->sk, *skb = sockb->sk;
1511 
1512 	/* Join our sockets back to back */
1513 	sock_hold(ska);
1514 	sock_hold(skb);
1515 	unix_peer(ska) = skb;
1516 	unix_peer(skb) = ska;
1517 	init_peercred(ska);
1518 	init_peercred(skb);
1519 
1520 	ska->sk_state = TCP_ESTABLISHED;
1521 	skb->sk_state = TCP_ESTABLISHED;
1522 	socka->state  = SS_CONNECTED;
1523 	sockb->state  = SS_CONNECTED;
1524 	return 0;
1525 }
1526 
1527 static void unix_sock_inherit_flags(const struct socket *old,
1528 				    struct socket *new)
1529 {
1530 	if (test_bit(SOCK_PASSCRED, &old->flags))
1531 		set_bit(SOCK_PASSCRED, &new->flags);
1532 	if (test_bit(SOCK_PASSSEC, &old->flags))
1533 		set_bit(SOCK_PASSSEC, &new->flags);
1534 }
1535 
1536 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1537 		       bool kern)
1538 {
1539 	struct sock *sk = sock->sk;
1540 	struct sock *tsk;
1541 	struct sk_buff *skb;
1542 	int err;
1543 
1544 	err = -EOPNOTSUPP;
1545 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1546 		goto out;
1547 
1548 	err = -EINVAL;
1549 	if (sk->sk_state != TCP_LISTEN)
1550 		goto out;
1551 
1552 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1553 	 * so that no locks are necessary.
1554 	 */
1555 
1556 	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1557 	if (!skb) {
1558 		/* This means receive shutdown. */
1559 		if (err == 0)
1560 			err = -EINVAL;
1561 		goto out;
1562 	}
1563 
1564 	tsk = skb->sk;
1565 	skb_free_datagram(sk, skb);
1566 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1567 
1568 	/* attach accepted sock to socket */
1569 	unix_state_lock(tsk);
1570 	newsock->state = SS_CONNECTED;
1571 	unix_sock_inherit_flags(sock, newsock);
1572 	sock_graft(tsk, newsock);
1573 	unix_state_unlock(tsk);
1574 	return 0;
1575 
1576 out:
1577 	return err;
1578 }
1579 
1580 
1581 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1582 {
1583 	struct sock *sk = sock->sk;
1584 	struct unix_address *addr;
1585 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1586 	int err = 0;
1587 
1588 	if (peer) {
1589 		sk = unix_peer_get(sk);
1590 
1591 		err = -ENOTCONN;
1592 		if (!sk)
1593 			goto out;
1594 		err = 0;
1595 	} else {
1596 		sock_hold(sk);
1597 	}
1598 
1599 	addr = smp_load_acquire(&unix_sk(sk)->addr);
1600 	if (!addr) {
1601 		sunaddr->sun_family = AF_UNIX;
1602 		sunaddr->sun_path[0] = 0;
1603 		err = sizeof(short);
1604 	} else {
1605 		err = addr->len;
1606 		memcpy(sunaddr, addr->name, addr->len);
1607 	}
1608 	sock_put(sk);
1609 out:
1610 	return err;
1611 }
1612 
1613 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1614 {
1615 	scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1616 
1617 	/*
1618 	 * Garbage collection of unix sockets starts by selecting a set of
1619 	 * candidate sockets which have reference only from being in flight
1620 	 * (total_refs == inflight_refs).  This condition is checked once during
1621 	 * the candidate collection phase, and candidates are marked as such, so
1622 	 * that non-candidates can later be ignored.  While inflight_refs is
1623 	 * protected by unix_gc_lock, total_refs (file count) is not, hence this
1624 	 * is an instantaneous decision.
1625 	 *
1626 	 * Once a candidate, however, the socket must not be reinstalled into a
1627 	 * file descriptor while the garbage collection is in progress.
1628 	 *
1629 	 * If the above conditions are met, then the directed graph of
1630 	 * candidates (*) does not change while unix_gc_lock is held.
1631 	 *
1632 	 * Any operations that changes the file count through file descriptors
1633 	 * (dup, close, sendmsg) does not change the graph since candidates are
1634 	 * not installed in fds.
1635 	 *
1636 	 * Dequeing a candidate via recvmsg would install it into an fd, but
1637 	 * that takes unix_gc_lock to decrement the inflight count, so it's
1638 	 * serialized with garbage collection.
1639 	 *
1640 	 * MSG_PEEK is special in that it does not change the inflight count,
1641 	 * yet does install the socket into an fd.  The following lock/unlock
1642 	 * pair is to ensure serialization with garbage collection.  It must be
1643 	 * done between incrementing the file count and installing the file into
1644 	 * an fd.
1645 	 *
1646 	 * If garbage collection starts after the barrier provided by the
1647 	 * lock/unlock, then it will see the elevated refcount and not mark this
1648 	 * as a candidate.  If a garbage collection is already in progress
1649 	 * before the file count was incremented, then the lock/unlock pair will
1650 	 * ensure that garbage collection is finished before progressing to
1651 	 * installing the fd.
1652 	 *
1653 	 * (*) A -> B where B is on the queue of A or B is on the queue of C
1654 	 * which is on the queue of listening socket A.
1655 	 */
1656 	spin_lock(&unix_gc_lock);
1657 	spin_unlock(&unix_gc_lock);
1658 }
1659 
1660 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1661 {
1662 	int err = 0;
1663 
1664 	UNIXCB(skb).pid  = get_pid(scm->pid);
1665 	UNIXCB(skb).uid = scm->creds.uid;
1666 	UNIXCB(skb).gid = scm->creds.gid;
1667 	UNIXCB(skb).fp = NULL;
1668 	unix_get_secdata(scm, skb);
1669 	if (scm->fp && send_fds)
1670 		err = unix_attach_fds(scm, skb);
1671 
1672 	skb->destructor = unix_destruct_scm;
1673 	return err;
1674 }
1675 
1676 static bool unix_passcred_enabled(const struct socket *sock,
1677 				  const struct sock *other)
1678 {
1679 	return test_bit(SOCK_PASSCRED, &sock->flags) ||
1680 	       !other->sk_socket ||
1681 	       test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1682 }
1683 
1684 /*
1685  * Some apps rely on write() giving SCM_CREDENTIALS
1686  * We include credentials if source or destination socket
1687  * asserted SOCK_PASSCRED.
1688  */
1689 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1690 			    const struct sock *other)
1691 {
1692 	if (UNIXCB(skb).pid)
1693 		return;
1694 	if (unix_passcred_enabled(sock, other)) {
1695 		UNIXCB(skb).pid  = get_pid(task_tgid(current));
1696 		current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1697 	}
1698 }
1699 
1700 static int maybe_init_creds(struct scm_cookie *scm,
1701 			    struct socket *socket,
1702 			    const struct sock *other)
1703 {
1704 	int err;
1705 	struct msghdr msg = { .msg_controllen = 0 };
1706 
1707 	err = scm_send(socket, &msg, scm, false);
1708 	if (err)
1709 		return err;
1710 
1711 	if (unix_passcred_enabled(socket, other)) {
1712 		scm->pid = get_pid(task_tgid(current));
1713 		current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1714 	}
1715 	return err;
1716 }
1717 
1718 static bool unix_skb_scm_eq(struct sk_buff *skb,
1719 			    struct scm_cookie *scm)
1720 {
1721 	const struct unix_skb_parms *u = &UNIXCB(skb);
1722 
1723 	return u->pid == scm->pid &&
1724 	       uid_eq(u->uid, scm->creds.uid) &&
1725 	       gid_eq(u->gid, scm->creds.gid) &&
1726 	       unix_secdata_eq(scm, skb);
1727 }
1728 
1729 static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
1730 {
1731 	struct scm_fp_list *fp = UNIXCB(skb).fp;
1732 	struct unix_sock *u = unix_sk(sk);
1733 
1734 	if (unlikely(fp && fp->count))
1735 		atomic_add(fp->count, &u->scm_stat.nr_fds);
1736 }
1737 
1738 static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
1739 {
1740 	struct scm_fp_list *fp = UNIXCB(skb).fp;
1741 	struct unix_sock *u = unix_sk(sk);
1742 
1743 	if (unlikely(fp && fp->count))
1744 		atomic_sub(fp->count, &u->scm_stat.nr_fds);
1745 }
1746 
1747 /*
1748  *	Send AF_UNIX data.
1749  */
1750 
1751 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1752 			      size_t len)
1753 {
1754 	struct sock *sk = sock->sk;
1755 	struct net *net = sock_net(sk);
1756 	struct unix_sock *u = unix_sk(sk);
1757 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1758 	struct sock *other = NULL;
1759 	int namelen = 0; /* fake GCC */
1760 	int err;
1761 	unsigned int hash;
1762 	struct sk_buff *skb;
1763 	long timeo;
1764 	struct scm_cookie scm;
1765 	int data_len = 0;
1766 	int sk_locked;
1767 
1768 	wait_for_unix_gc();
1769 	err = scm_send(sock, msg, &scm, false);
1770 	if (err < 0)
1771 		return err;
1772 
1773 	err = -EOPNOTSUPP;
1774 	if (msg->msg_flags&MSG_OOB)
1775 		goto out;
1776 
1777 	if (msg->msg_namelen) {
1778 		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1779 		if (err < 0)
1780 			goto out;
1781 		namelen = err;
1782 	} else {
1783 		sunaddr = NULL;
1784 		err = -ENOTCONN;
1785 		other = unix_peer_get(sk);
1786 		if (!other)
1787 			goto out;
1788 	}
1789 
1790 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1791 	    && (err = unix_autobind(sock)) != 0)
1792 		goto out;
1793 
1794 	err = -EMSGSIZE;
1795 	if (len > sk->sk_sndbuf - 32)
1796 		goto out;
1797 
1798 	if (len > SKB_MAX_ALLOC) {
1799 		data_len = min_t(size_t,
1800 				 len - SKB_MAX_ALLOC,
1801 				 MAX_SKB_FRAGS * PAGE_SIZE);
1802 		data_len = PAGE_ALIGN(data_len);
1803 
1804 		BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1805 	}
1806 
1807 	skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1808 				   msg->msg_flags & MSG_DONTWAIT, &err,
1809 				   PAGE_ALLOC_COSTLY_ORDER);
1810 	if (skb == NULL)
1811 		goto out;
1812 
1813 	err = unix_scm_to_skb(&scm, skb, true);
1814 	if (err < 0)
1815 		goto out_free;
1816 
1817 	skb_put(skb, len - data_len);
1818 	skb->data_len = data_len;
1819 	skb->len = len;
1820 	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1821 	if (err)
1822 		goto out_free;
1823 
1824 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1825 
1826 restart:
1827 	if (!other) {
1828 		err = -ECONNRESET;
1829 		if (sunaddr == NULL)
1830 			goto out_free;
1831 
1832 		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1833 					hash, &err);
1834 		if (other == NULL)
1835 			goto out_free;
1836 	}
1837 
1838 	if (sk_filter(other, skb) < 0) {
1839 		/* Toss the packet but do not return any error to the sender */
1840 		err = len;
1841 		goto out_free;
1842 	}
1843 
1844 	sk_locked = 0;
1845 	unix_state_lock(other);
1846 restart_locked:
1847 	err = -EPERM;
1848 	if (!unix_may_send(sk, other))
1849 		goto out_unlock;
1850 
1851 	if (unlikely(sock_flag(other, SOCK_DEAD))) {
1852 		/*
1853 		 *	Check with 1003.1g - what should
1854 		 *	datagram error
1855 		 */
1856 		unix_state_unlock(other);
1857 		sock_put(other);
1858 
1859 		if (!sk_locked)
1860 			unix_state_lock(sk);
1861 
1862 		err = 0;
1863 		if (unix_peer(sk) == other) {
1864 			unix_peer(sk) = NULL;
1865 			unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1866 
1867 			unix_state_unlock(sk);
1868 
1869 			sk->sk_state = TCP_CLOSE;
1870 			unix_dgram_disconnected(sk, other);
1871 			sock_put(other);
1872 			err = -ECONNREFUSED;
1873 		} else {
1874 			unix_state_unlock(sk);
1875 		}
1876 
1877 		other = NULL;
1878 		if (err)
1879 			goto out_free;
1880 		goto restart;
1881 	}
1882 
1883 	err = -EPIPE;
1884 	if (other->sk_shutdown & RCV_SHUTDOWN)
1885 		goto out_unlock;
1886 
1887 	if (sk->sk_type != SOCK_SEQPACKET) {
1888 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1889 		if (err)
1890 			goto out_unlock;
1891 	}
1892 
1893 	/* other == sk && unix_peer(other) != sk if
1894 	 * - unix_peer(sk) == NULL, destination address bound to sk
1895 	 * - unix_peer(sk) == sk by time of get but disconnected before lock
1896 	 */
1897 	if (other != sk &&
1898 	    unlikely(unix_peer(other) != sk &&
1899 	    unix_recvq_full_lockless(other))) {
1900 		if (timeo) {
1901 			timeo = unix_wait_for_peer(other, timeo);
1902 
1903 			err = sock_intr_errno(timeo);
1904 			if (signal_pending(current))
1905 				goto out_free;
1906 
1907 			goto restart;
1908 		}
1909 
1910 		if (!sk_locked) {
1911 			unix_state_unlock(other);
1912 			unix_state_double_lock(sk, other);
1913 		}
1914 
1915 		if (unix_peer(sk) != other ||
1916 		    unix_dgram_peer_wake_me(sk, other)) {
1917 			err = -EAGAIN;
1918 			sk_locked = 1;
1919 			goto out_unlock;
1920 		}
1921 
1922 		if (!sk_locked) {
1923 			sk_locked = 1;
1924 			goto restart_locked;
1925 		}
1926 	}
1927 
1928 	if (unlikely(sk_locked))
1929 		unix_state_unlock(sk);
1930 
1931 	if (sock_flag(other, SOCK_RCVTSTAMP))
1932 		__net_timestamp(skb);
1933 	maybe_add_creds(skb, sock, other);
1934 	scm_stat_add(other, skb);
1935 	skb_queue_tail(&other->sk_receive_queue, skb);
1936 	unix_state_unlock(other);
1937 	other->sk_data_ready(other);
1938 	sock_put(other);
1939 	scm_destroy(&scm);
1940 	return len;
1941 
1942 out_unlock:
1943 	if (sk_locked)
1944 		unix_state_unlock(sk);
1945 	unix_state_unlock(other);
1946 out_free:
1947 	kfree_skb(skb);
1948 out:
1949 	if (other)
1950 		sock_put(other);
1951 	scm_destroy(&scm);
1952 	return err;
1953 }
1954 
1955 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1956  * bytes, and a minimum of a full page.
1957  */
1958 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1959 
1960 #if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
1961 static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other)
1962 {
1963 	struct unix_sock *ousk = unix_sk(other);
1964 	struct sk_buff *skb;
1965 	int err = 0;
1966 
1967 	skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
1968 
1969 	if (!skb)
1970 		return err;
1971 
1972 	skb_put(skb, 1);
1973 	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
1974 
1975 	if (err) {
1976 		kfree_skb(skb);
1977 		return err;
1978 	}
1979 
1980 	unix_state_lock(other);
1981 
1982 	if (sock_flag(other, SOCK_DEAD) ||
1983 	    (other->sk_shutdown & RCV_SHUTDOWN)) {
1984 		unix_state_unlock(other);
1985 		kfree_skb(skb);
1986 		return -EPIPE;
1987 	}
1988 
1989 	maybe_add_creds(skb, sock, other);
1990 	skb_get(skb);
1991 
1992 	if (ousk->oob_skb)
1993 		consume_skb(ousk->oob_skb);
1994 
1995 	ousk->oob_skb = skb;
1996 
1997 	scm_stat_add(other, skb);
1998 	skb_queue_tail(&other->sk_receive_queue, skb);
1999 	sk_send_sigurg(other);
2000 	unix_state_unlock(other);
2001 	other->sk_data_ready(other);
2002 
2003 	return err;
2004 }
2005 #endif
2006 
2007 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
2008 			       size_t len)
2009 {
2010 	struct sock *sk = sock->sk;
2011 	struct sock *other = NULL;
2012 	int err, size;
2013 	struct sk_buff *skb;
2014 	int sent = 0;
2015 	struct scm_cookie scm;
2016 	bool fds_sent = false;
2017 	int data_len;
2018 
2019 	wait_for_unix_gc();
2020 	err = scm_send(sock, msg, &scm, false);
2021 	if (err < 0)
2022 		return err;
2023 
2024 	err = -EOPNOTSUPP;
2025 	if (msg->msg_flags & MSG_OOB) {
2026 #if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
2027 		if (len)
2028 			len--;
2029 		else
2030 #endif
2031 			goto out_err;
2032 	}
2033 
2034 	if (msg->msg_namelen) {
2035 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
2036 		goto out_err;
2037 	} else {
2038 		err = -ENOTCONN;
2039 		other = unix_peer(sk);
2040 		if (!other)
2041 			goto out_err;
2042 	}
2043 
2044 	if (sk->sk_shutdown & SEND_SHUTDOWN)
2045 		goto pipe_err;
2046 
2047 	while (sent < len) {
2048 		size = len - sent;
2049 
2050 		/* Keep two messages in the pipe so it schedules better */
2051 		size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
2052 
2053 		/* allow fallback to order-0 allocations */
2054 		size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
2055 
2056 		data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
2057 
2058 		data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
2059 
2060 		skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
2061 					   msg->msg_flags & MSG_DONTWAIT, &err,
2062 					   get_order(UNIX_SKB_FRAGS_SZ));
2063 		if (!skb)
2064 			goto out_err;
2065 
2066 		/* Only send the fds in the first buffer */
2067 		err = unix_scm_to_skb(&scm, skb, !fds_sent);
2068 		if (err < 0) {
2069 			kfree_skb(skb);
2070 			goto out_err;
2071 		}
2072 		fds_sent = true;
2073 
2074 		skb_put(skb, size - data_len);
2075 		skb->data_len = data_len;
2076 		skb->len = size;
2077 		err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
2078 		if (err) {
2079 			kfree_skb(skb);
2080 			goto out_err;
2081 		}
2082 
2083 		unix_state_lock(other);
2084 
2085 		if (sock_flag(other, SOCK_DEAD) ||
2086 		    (other->sk_shutdown & RCV_SHUTDOWN))
2087 			goto pipe_err_free;
2088 
2089 		maybe_add_creds(skb, sock, other);
2090 		scm_stat_add(other, skb);
2091 		skb_queue_tail(&other->sk_receive_queue, skb);
2092 		unix_state_unlock(other);
2093 		other->sk_data_ready(other);
2094 		sent += size;
2095 	}
2096 
2097 #if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
2098 	if (msg->msg_flags & MSG_OOB) {
2099 		err = queue_oob(sock, msg, other);
2100 		if (err)
2101 			goto out_err;
2102 		sent++;
2103 	}
2104 #endif
2105 
2106 	scm_destroy(&scm);
2107 
2108 	return sent;
2109 
2110 pipe_err_free:
2111 	unix_state_unlock(other);
2112 	kfree_skb(skb);
2113 pipe_err:
2114 	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
2115 		send_sig(SIGPIPE, current, 0);
2116 	err = -EPIPE;
2117 out_err:
2118 	scm_destroy(&scm);
2119 	return sent ? : err;
2120 }
2121 
2122 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
2123 				    int offset, size_t size, int flags)
2124 {
2125 	int err;
2126 	bool send_sigpipe = false;
2127 	bool init_scm = true;
2128 	struct scm_cookie scm;
2129 	struct sock *other, *sk = socket->sk;
2130 	struct sk_buff *skb, *newskb = NULL, *tail = NULL;
2131 
2132 	if (flags & MSG_OOB)
2133 		return -EOPNOTSUPP;
2134 
2135 	other = unix_peer(sk);
2136 	if (!other || sk->sk_state != TCP_ESTABLISHED)
2137 		return -ENOTCONN;
2138 
2139 	if (false) {
2140 alloc_skb:
2141 		unix_state_unlock(other);
2142 		mutex_unlock(&unix_sk(other)->iolock);
2143 		newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
2144 					      &err, 0);
2145 		if (!newskb)
2146 			goto err;
2147 	}
2148 
2149 	/* we must acquire iolock as we modify already present
2150 	 * skbs in the sk_receive_queue and mess with skb->len
2151 	 */
2152 	err = mutex_lock_interruptible(&unix_sk(other)->iolock);
2153 	if (err) {
2154 		err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
2155 		goto err;
2156 	}
2157 
2158 	if (sk->sk_shutdown & SEND_SHUTDOWN) {
2159 		err = -EPIPE;
2160 		send_sigpipe = true;
2161 		goto err_unlock;
2162 	}
2163 
2164 	unix_state_lock(other);
2165 
2166 	if (sock_flag(other, SOCK_DEAD) ||
2167 	    other->sk_shutdown & RCV_SHUTDOWN) {
2168 		err = -EPIPE;
2169 		send_sigpipe = true;
2170 		goto err_state_unlock;
2171 	}
2172 
2173 	if (init_scm) {
2174 		err = maybe_init_creds(&scm, socket, other);
2175 		if (err)
2176 			goto err_state_unlock;
2177 		init_scm = false;
2178 	}
2179 
2180 	skb = skb_peek_tail(&other->sk_receive_queue);
2181 	if (tail && tail == skb) {
2182 		skb = newskb;
2183 	} else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2184 		if (newskb) {
2185 			skb = newskb;
2186 		} else {
2187 			tail = skb;
2188 			goto alloc_skb;
2189 		}
2190 	} else if (newskb) {
2191 		/* this is fast path, we don't necessarily need to
2192 		 * call to kfree_skb even though with newskb == NULL
2193 		 * this - does no harm
2194 		 */
2195 		consume_skb(newskb);
2196 		newskb = NULL;
2197 	}
2198 
2199 	if (skb_append_pagefrags(skb, page, offset, size)) {
2200 		tail = skb;
2201 		goto alloc_skb;
2202 	}
2203 
2204 	skb->len += size;
2205 	skb->data_len += size;
2206 	skb->truesize += size;
2207 	refcount_add(size, &sk->sk_wmem_alloc);
2208 
2209 	if (newskb) {
2210 		err = unix_scm_to_skb(&scm, skb, false);
2211 		if (err)
2212 			goto err_state_unlock;
2213 		spin_lock(&other->sk_receive_queue.lock);
2214 		__skb_queue_tail(&other->sk_receive_queue, newskb);
2215 		spin_unlock(&other->sk_receive_queue.lock);
2216 	}
2217 
2218 	unix_state_unlock(other);
2219 	mutex_unlock(&unix_sk(other)->iolock);
2220 
2221 	other->sk_data_ready(other);
2222 	scm_destroy(&scm);
2223 	return size;
2224 
2225 err_state_unlock:
2226 	unix_state_unlock(other);
2227 err_unlock:
2228 	mutex_unlock(&unix_sk(other)->iolock);
2229 err:
2230 	kfree_skb(newskb);
2231 	if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2232 		send_sig(SIGPIPE, current, 0);
2233 	if (!init_scm)
2234 		scm_destroy(&scm);
2235 	return err;
2236 }
2237 
2238 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2239 				  size_t len)
2240 {
2241 	int err;
2242 	struct sock *sk = sock->sk;
2243 
2244 	err = sock_error(sk);
2245 	if (err)
2246 		return err;
2247 
2248 	if (sk->sk_state != TCP_ESTABLISHED)
2249 		return -ENOTCONN;
2250 
2251 	if (msg->msg_namelen)
2252 		msg->msg_namelen = 0;
2253 
2254 	return unix_dgram_sendmsg(sock, msg, len);
2255 }
2256 
2257 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2258 				  size_t size, int flags)
2259 {
2260 	struct sock *sk = sock->sk;
2261 
2262 	if (sk->sk_state != TCP_ESTABLISHED)
2263 		return -ENOTCONN;
2264 
2265 	return unix_dgram_recvmsg(sock, msg, size, flags);
2266 }
2267 
2268 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2269 {
2270 	struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2271 
2272 	if (addr) {
2273 		msg->msg_namelen = addr->len;
2274 		memcpy(msg->msg_name, addr->name, addr->len);
2275 	}
2276 }
2277 
2278 int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
2279 			 int flags)
2280 {
2281 	struct scm_cookie scm;
2282 	struct socket *sock = sk->sk_socket;
2283 	struct unix_sock *u = unix_sk(sk);
2284 	struct sk_buff *skb, *last;
2285 	long timeo;
2286 	int skip;
2287 	int err;
2288 
2289 	err = -EOPNOTSUPP;
2290 	if (flags&MSG_OOB)
2291 		goto out;
2292 
2293 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2294 
2295 	do {
2296 		mutex_lock(&u->iolock);
2297 
2298 		skip = sk_peek_offset(sk, flags);
2299 		skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2300 					      &skip, &err, &last);
2301 		if (skb) {
2302 			if (!(flags & MSG_PEEK))
2303 				scm_stat_del(sk, skb);
2304 			break;
2305 		}
2306 
2307 		mutex_unlock(&u->iolock);
2308 
2309 		if (err != -EAGAIN)
2310 			break;
2311 	} while (timeo &&
2312 		 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2313 					      &err, &timeo, last));
2314 
2315 	if (!skb) { /* implies iolock unlocked */
2316 		unix_state_lock(sk);
2317 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2318 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2319 		    (sk->sk_shutdown & RCV_SHUTDOWN))
2320 			err = 0;
2321 		unix_state_unlock(sk);
2322 		goto out;
2323 	}
2324 
2325 	if (wq_has_sleeper(&u->peer_wait))
2326 		wake_up_interruptible_sync_poll(&u->peer_wait,
2327 						EPOLLOUT | EPOLLWRNORM |
2328 						EPOLLWRBAND);
2329 
2330 	if (msg->msg_name)
2331 		unix_copy_addr(msg, skb->sk);
2332 
2333 	if (size > skb->len - skip)
2334 		size = skb->len - skip;
2335 	else if (size < skb->len - skip)
2336 		msg->msg_flags |= MSG_TRUNC;
2337 
2338 	err = skb_copy_datagram_msg(skb, skip, msg, size);
2339 	if (err)
2340 		goto out_free;
2341 
2342 	if (sock_flag(sk, SOCK_RCVTSTAMP))
2343 		__sock_recv_timestamp(msg, sk, skb);
2344 
2345 	memset(&scm, 0, sizeof(scm));
2346 
2347 	scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2348 	unix_set_secdata(&scm, skb);
2349 
2350 	if (!(flags & MSG_PEEK)) {
2351 		if (UNIXCB(skb).fp)
2352 			unix_detach_fds(&scm, skb);
2353 
2354 		sk_peek_offset_bwd(sk, skb->len);
2355 	} else {
2356 		/* It is questionable: on PEEK we could:
2357 		   - do not return fds - good, but too simple 8)
2358 		   - return fds, and do not return them on read (old strategy,
2359 		     apparently wrong)
2360 		   - clone fds (I chose it for now, it is the most universal
2361 		     solution)
2362 
2363 		   POSIX 1003.1g does not actually define this clearly
2364 		   at all. POSIX 1003.1g doesn't define a lot of things
2365 		   clearly however!
2366 
2367 		*/
2368 
2369 		sk_peek_offset_fwd(sk, size);
2370 
2371 		if (UNIXCB(skb).fp)
2372 			unix_peek_fds(&scm, skb);
2373 	}
2374 	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2375 
2376 	scm_recv(sock, msg, &scm, flags);
2377 
2378 out_free:
2379 	skb_free_datagram(sk, skb);
2380 	mutex_unlock(&u->iolock);
2381 out:
2382 	return err;
2383 }
2384 
2385 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
2386 			      int flags)
2387 {
2388 	struct sock *sk = sock->sk;
2389 
2390 #ifdef CONFIG_BPF_SYSCALL
2391 	const struct proto *prot = READ_ONCE(sk->sk_prot);
2392 
2393 	if (prot != &unix_dgram_proto)
2394 		return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
2395 					    flags & ~MSG_DONTWAIT, NULL);
2396 #endif
2397 	return __unix_dgram_recvmsg(sk, msg, size, flags);
2398 }
2399 
2400 static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
2401 			  sk_read_actor_t recv_actor)
2402 {
2403 	int copied = 0;
2404 
2405 	while (1) {
2406 		struct unix_sock *u = unix_sk(sk);
2407 		struct sk_buff *skb;
2408 		int used, err;
2409 
2410 		mutex_lock(&u->iolock);
2411 		skb = skb_recv_datagram(sk, 0, 1, &err);
2412 		mutex_unlock(&u->iolock);
2413 		if (!skb)
2414 			return err;
2415 
2416 		used = recv_actor(desc, skb, 0, skb->len);
2417 		if (used <= 0) {
2418 			if (!copied)
2419 				copied = used;
2420 			kfree_skb(skb);
2421 			break;
2422 		} else if (used <= skb->len) {
2423 			copied += used;
2424 		}
2425 
2426 		kfree_skb(skb);
2427 		if (!desc->count)
2428 			break;
2429 	}
2430 
2431 	return copied;
2432 }
2433 
2434 /*
2435  *	Sleep until more data has arrived. But check for races..
2436  */
2437 static long unix_stream_data_wait(struct sock *sk, long timeo,
2438 				  struct sk_buff *last, unsigned int last_len,
2439 				  bool freezable)
2440 {
2441 	struct sk_buff *tail;
2442 	DEFINE_WAIT(wait);
2443 
2444 	unix_state_lock(sk);
2445 
2446 	for (;;) {
2447 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2448 
2449 		tail = skb_peek_tail(&sk->sk_receive_queue);
2450 		if (tail != last ||
2451 		    (tail && tail->len != last_len) ||
2452 		    sk->sk_err ||
2453 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
2454 		    signal_pending(current) ||
2455 		    !timeo)
2456 			break;
2457 
2458 		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2459 		unix_state_unlock(sk);
2460 		if (freezable)
2461 			timeo = freezable_schedule_timeout(timeo);
2462 		else
2463 			timeo = schedule_timeout(timeo);
2464 		unix_state_lock(sk);
2465 
2466 		if (sock_flag(sk, SOCK_DEAD))
2467 			break;
2468 
2469 		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2470 	}
2471 
2472 	finish_wait(sk_sleep(sk), &wait);
2473 	unix_state_unlock(sk);
2474 	return timeo;
2475 }
2476 
2477 static unsigned int unix_skb_len(const struct sk_buff *skb)
2478 {
2479 	return skb->len - UNIXCB(skb).consumed;
2480 }
2481 
2482 struct unix_stream_read_state {
2483 	int (*recv_actor)(struct sk_buff *, int, int,
2484 			  struct unix_stream_read_state *);
2485 	struct socket *socket;
2486 	struct msghdr *msg;
2487 	struct pipe_inode_info *pipe;
2488 	size_t size;
2489 	int flags;
2490 	unsigned int splice_flags;
2491 };
2492 
2493 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2494 static int unix_stream_recv_urg(struct unix_stream_read_state *state)
2495 {
2496 	struct socket *sock = state->socket;
2497 	struct sock *sk = sock->sk;
2498 	struct unix_sock *u = unix_sk(sk);
2499 	int chunk = 1;
2500 	struct sk_buff *oob_skb;
2501 
2502 	mutex_lock(&u->iolock);
2503 	unix_state_lock(sk);
2504 
2505 	if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
2506 		unix_state_unlock(sk);
2507 		mutex_unlock(&u->iolock);
2508 		return -EINVAL;
2509 	}
2510 
2511 	oob_skb = u->oob_skb;
2512 
2513 	if (!(state->flags & MSG_PEEK)) {
2514 		u->oob_skb = NULL;
2515 	}
2516 
2517 	unix_state_unlock(sk);
2518 
2519 	chunk = state->recv_actor(oob_skb, 0, chunk, state);
2520 
2521 	if (!(state->flags & MSG_PEEK)) {
2522 		UNIXCB(oob_skb).consumed += 1;
2523 		kfree_skb(oob_skb);
2524 	}
2525 
2526 	mutex_unlock(&u->iolock);
2527 
2528 	if (chunk < 0)
2529 		return -EFAULT;
2530 
2531 	state->msg->msg_flags |= MSG_OOB;
2532 	return 1;
2533 }
2534 
2535 static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
2536 				  int flags, int copied)
2537 {
2538 	struct unix_sock *u = unix_sk(sk);
2539 
2540 	if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) {
2541 		skb_unlink(skb, &sk->sk_receive_queue);
2542 		consume_skb(skb);
2543 		skb = NULL;
2544 	} else {
2545 		if (skb == u->oob_skb) {
2546 			if (copied) {
2547 				skb = NULL;
2548 			} else if (sock_flag(sk, SOCK_URGINLINE)) {
2549 				if (!(flags & MSG_PEEK)) {
2550 					u->oob_skb = NULL;
2551 					consume_skb(skb);
2552 				}
2553 			} else if (!(flags & MSG_PEEK)) {
2554 				skb_unlink(skb, &sk->sk_receive_queue);
2555 				consume_skb(skb);
2556 				skb = skb_peek(&sk->sk_receive_queue);
2557 			}
2558 		}
2559 	}
2560 	return skb;
2561 }
2562 #endif
2563 
2564 static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc,
2565 				 sk_read_actor_t recv_actor)
2566 {
2567 	if (unlikely(sk->sk_state != TCP_ESTABLISHED))
2568 		return -ENOTCONN;
2569 
2570 	return unix_read_sock(sk, desc, recv_actor);
2571 }
2572 
2573 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2574 				    bool freezable)
2575 {
2576 	struct scm_cookie scm;
2577 	struct socket *sock = state->socket;
2578 	struct sock *sk = sock->sk;
2579 	struct unix_sock *u = unix_sk(sk);
2580 	int copied = 0;
2581 	int flags = state->flags;
2582 	int noblock = flags & MSG_DONTWAIT;
2583 	bool check_creds = false;
2584 	int target;
2585 	int err = 0;
2586 	long timeo;
2587 	int skip;
2588 	size_t size = state->size;
2589 	unsigned int last_len;
2590 
2591 	if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2592 		err = -EINVAL;
2593 		goto out;
2594 	}
2595 
2596 	if (unlikely(flags & MSG_OOB)) {
2597 		err = -EOPNOTSUPP;
2598 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2599 		err = unix_stream_recv_urg(state);
2600 #endif
2601 		goto out;
2602 	}
2603 
2604 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2605 	timeo = sock_rcvtimeo(sk, noblock);
2606 
2607 	memset(&scm, 0, sizeof(scm));
2608 
2609 	/* Lock the socket to prevent queue disordering
2610 	 * while sleeps in memcpy_tomsg
2611 	 */
2612 	mutex_lock(&u->iolock);
2613 
2614 	skip = max(sk_peek_offset(sk, flags), 0);
2615 
2616 	do {
2617 		int chunk;
2618 		bool drop_skb;
2619 		struct sk_buff *skb, *last;
2620 
2621 redo:
2622 		unix_state_lock(sk);
2623 		if (sock_flag(sk, SOCK_DEAD)) {
2624 			err = -ECONNRESET;
2625 			goto unlock;
2626 		}
2627 		last = skb = skb_peek(&sk->sk_receive_queue);
2628 		last_len = last ? last->len : 0;
2629 
2630 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2631 		if (skb) {
2632 			skb = manage_oob(skb, sk, flags, copied);
2633 			if (!skb) {
2634 				unix_state_unlock(sk);
2635 				if (copied)
2636 					break;
2637 				goto redo;
2638 			}
2639 		}
2640 #endif
2641 again:
2642 		if (skb == NULL) {
2643 			if (copied >= target)
2644 				goto unlock;
2645 
2646 			/*
2647 			 *	POSIX 1003.1g mandates this order.
2648 			 */
2649 
2650 			err = sock_error(sk);
2651 			if (err)
2652 				goto unlock;
2653 			if (sk->sk_shutdown & RCV_SHUTDOWN)
2654 				goto unlock;
2655 
2656 			unix_state_unlock(sk);
2657 			if (!timeo) {
2658 				err = -EAGAIN;
2659 				break;
2660 			}
2661 
2662 			mutex_unlock(&u->iolock);
2663 
2664 			timeo = unix_stream_data_wait(sk, timeo, last,
2665 						      last_len, freezable);
2666 
2667 			if (signal_pending(current)) {
2668 				err = sock_intr_errno(timeo);
2669 				scm_destroy(&scm);
2670 				goto out;
2671 			}
2672 
2673 			mutex_lock(&u->iolock);
2674 			goto redo;
2675 unlock:
2676 			unix_state_unlock(sk);
2677 			break;
2678 		}
2679 
2680 		while (skip >= unix_skb_len(skb)) {
2681 			skip -= unix_skb_len(skb);
2682 			last = skb;
2683 			last_len = skb->len;
2684 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2685 			if (!skb)
2686 				goto again;
2687 		}
2688 
2689 		unix_state_unlock(sk);
2690 
2691 		if (check_creds) {
2692 			/* Never glue messages from different writers */
2693 			if (!unix_skb_scm_eq(skb, &scm))
2694 				break;
2695 		} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2696 			/* Copy credentials */
2697 			scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2698 			unix_set_secdata(&scm, skb);
2699 			check_creds = true;
2700 		}
2701 
2702 		/* Copy address just once */
2703 		if (state->msg && state->msg->msg_name) {
2704 			DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2705 					 state->msg->msg_name);
2706 			unix_copy_addr(state->msg, skb->sk);
2707 			sunaddr = NULL;
2708 		}
2709 
2710 		chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2711 		skb_get(skb);
2712 		chunk = state->recv_actor(skb, skip, chunk, state);
2713 		drop_skb = !unix_skb_len(skb);
2714 		/* skb is only safe to use if !drop_skb */
2715 		consume_skb(skb);
2716 		if (chunk < 0) {
2717 			if (copied == 0)
2718 				copied = -EFAULT;
2719 			break;
2720 		}
2721 		copied += chunk;
2722 		size -= chunk;
2723 
2724 		if (drop_skb) {
2725 			/* the skb was touched by a concurrent reader;
2726 			 * we should not expect anything from this skb
2727 			 * anymore and assume it invalid - we can be
2728 			 * sure it was dropped from the socket queue
2729 			 *
2730 			 * let's report a short read
2731 			 */
2732 			err = 0;
2733 			break;
2734 		}
2735 
2736 		/* Mark read part of skb as used */
2737 		if (!(flags & MSG_PEEK)) {
2738 			UNIXCB(skb).consumed += chunk;
2739 
2740 			sk_peek_offset_bwd(sk, chunk);
2741 
2742 			if (UNIXCB(skb).fp) {
2743 				scm_stat_del(sk, skb);
2744 				unix_detach_fds(&scm, skb);
2745 			}
2746 
2747 			if (unix_skb_len(skb))
2748 				break;
2749 
2750 			skb_unlink(skb, &sk->sk_receive_queue);
2751 			consume_skb(skb);
2752 
2753 			if (scm.fp)
2754 				break;
2755 		} else {
2756 			/* It is questionable, see note in unix_dgram_recvmsg.
2757 			 */
2758 			if (UNIXCB(skb).fp)
2759 				unix_peek_fds(&scm, skb);
2760 
2761 			sk_peek_offset_fwd(sk, chunk);
2762 
2763 			if (UNIXCB(skb).fp)
2764 				break;
2765 
2766 			skip = 0;
2767 			last = skb;
2768 			last_len = skb->len;
2769 			unix_state_lock(sk);
2770 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2771 			if (skb)
2772 				goto again;
2773 			unix_state_unlock(sk);
2774 			break;
2775 		}
2776 	} while (size);
2777 
2778 	mutex_unlock(&u->iolock);
2779 	if (state->msg)
2780 		scm_recv(sock, state->msg, &scm, flags);
2781 	else
2782 		scm_destroy(&scm);
2783 out:
2784 	return copied ? : err;
2785 }
2786 
2787 static int unix_stream_read_actor(struct sk_buff *skb,
2788 				  int skip, int chunk,
2789 				  struct unix_stream_read_state *state)
2790 {
2791 	int ret;
2792 
2793 	ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2794 				    state->msg, chunk);
2795 	return ret ?: chunk;
2796 }
2797 
2798 int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg,
2799 			  size_t size, int flags)
2800 {
2801 	struct unix_stream_read_state state = {
2802 		.recv_actor = unix_stream_read_actor,
2803 		.socket = sk->sk_socket,
2804 		.msg = msg,
2805 		.size = size,
2806 		.flags = flags
2807 	};
2808 
2809 	return unix_stream_read_generic(&state, true);
2810 }
2811 
2812 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2813 			       size_t size, int flags)
2814 {
2815 	struct unix_stream_read_state state = {
2816 		.recv_actor = unix_stream_read_actor,
2817 		.socket = sock,
2818 		.msg = msg,
2819 		.size = size,
2820 		.flags = flags
2821 	};
2822 
2823 #ifdef CONFIG_BPF_SYSCALL
2824 	struct sock *sk = sock->sk;
2825 	const struct proto *prot = READ_ONCE(sk->sk_prot);
2826 
2827 	if (prot != &unix_stream_proto)
2828 		return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
2829 					    flags & ~MSG_DONTWAIT, NULL);
2830 #endif
2831 	return unix_stream_read_generic(&state, true);
2832 }
2833 
2834 static int unix_stream_splice_actor(struct sk_buff *skb,
2835 				    int skip, int chunk,
2836 				    struct unix_stream_read_state *state)
2837 {
2838 	return skb_splice_bits(skb, state->socket->sk,
2839 			       UNIXCB(skb).consumed + skip,
2840 			       state->pipe, chunk, state->splice_flags);
2841 }
2842 
2843 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2844 				       struct pipe_inode_info *pipe,
2845 				       size_t size, unsigned int flags)
2846 {
2847 	struct unix_stream_read_state state = {
2848 		.recv_actor = unix_stream_splice_actor,
2849 		.socket = sock,
2850 		.pipe = pipe,
2851 		.size = size,
2852 		.splice_flags = flags,
2853 	};
2854 
2855 	if (unlikely(*ppos))
2856 		return -ESPIPE;
2857 
2858 	if (sock->file->f_flags & O_NONBLOCK ||
2859 	    flags & SPLICE_F_NONBLOCK)
2860 		state.flags = MSG_DONTWAIT;
2861 
2862 	return unix_stream_read_generic(&state, false);
2863 }
2864 
2865 static int unix_shutdown(struct socket *sock, int mode)
2866 {
2867 	struct sock *sk = sock->sk;
2868 	struct sock *other;
2869 
2870 	if (mode < SHUT_RD || mode > SHUT_RDWR)
2871 		return -EINVAL;
2872 	/* This maps:
2873 	 * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2874 	 * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2875 	 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2876 	 */
2877 	++mode;
2878 
2879 	unix_state_lock(sk);
2880 	sk->sk_shutdown |= mode;
2881 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2882 	    mode == SHUTDOWN_MASK)
2883 		sk->sk_state = TCP_CLOSE;
2884 	other = unix_peer(sk);
2885 	if (other)
2886 		sock_hold(other);
2887 	unix_state_unlock(sk);
2888 	sk->sk_state_change(sk);
2889 
2890 	if (other &&
2891 		(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2892 
2893 		int peer_mode = 0;
2894 		const struct proto *prot = READ_ONCE(other->sk_prot);
2895 
2896 		if (prot->unhash)
2897 			prot->unhash(other);
2898 		if (mode&RCV_SHUTDOWN)
2899 			peer_mode |= SEND_SHUTDOWN;
2900 		if (mode&SEND_SHUTDOWN)
2901 			peer_mode |= RCV_SHUTDOWN;
2902 		unix_state_lock(other);
2903 		other->sk_shutdown |= peer_mode;
2904 		unix_state_unlock(other);
2905 		other->sk_state_change(other);
2906 		if (peer_mode == SHUTDOWN_MASK)
2907 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2908 		else if (peer_mode & RCV_SHUTDOWN)
2909 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2910 	}
2911 	if (other)
2912 		sock_put(other);
2913 
2914 	return 0;
2915 }
2916 
2917 long unix_inq_len(struct sock *sk)
2918 {
2919 	struct sk_buff *skb;
2920 	long amount = 0;
2921 
2922 	if (sk->sk_state == TCP_LISTEN)
2923 		return -EINVAL;
2924 
2925 	spin_lock(&sk->sk_receive_queue.lock);
2926 	if (sk->sk_type == SOCK_STREAM ||
2927 	    sk->sk_type == SOCK_SEQPACKET) {
2928 		skb_queue_walk(&sk->sk_receive_queue, skb)
2929 			amount += unix_skb_len(skb);
2930 	} else {
2931 		skb = skb_peek(&sk->sk_receive_queue);
2932 		if (skb)
2933 			amount = skb->len;
2934 	}
2935 	spin_unlock(&sk->sk_receive_queue.lock);
2936 
2937 	return amount;
2938 }
2939 EXPORT_SYMBOL_GPL(unix_inq_len);
2940 
2941 long unix_outq_len(struct sock *sk)
2942 {
2943 	return sk_wmem_alloc_get(sk);
2944 }
2945 EXPORT_SYMBOL_GPL(unix_outq_len);
2946 
2947 static int unix_open_file(struct sock *sk)
2948 {
2949 	struct path path;
2950 	struct file *f;
2951 	int fd;
2952 
2953 	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2954 		return -EPERM;
2955 
2956 	if (!smp_load_acquire(&unix_sk(sk)->addr))
2957 		return -ENOENT;
2958 
2959 	path = unix_sk(sk)->path;
2960 	if (!path.dentry)
2961 		return -ENOENT;
2962 
2963 	path_get(&path);
2964 
2965 	fd = get_unused_fd_flags(O_CLOEXEC);
2966 	if (fd < 0)
2967 		goto out;
2968 
2969 	f = dentry_open(&path, O_PATH, current_cred());
2970 	if (IS_ERR(f)) {
2971 		put_unused_fd(fd);
2972 		fd = PTR_ERR(f);
2973 		goto out;
2974 	}
2975 
2976 	fd_install(fd, f);
2977 out:
2978 	path_put(&path);
2979 
2980 	return fd;
2981 }
2982 
2983 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2984 {
2985 	struct sock *sk = sock->sk;
2986 	long amount = 0;
2987 	int err;
2988 
2989 	switch (cmd) {
2990 	case SIOCOUTQ:
2991 		amount = unix_outq_len(sk);
2992 		err = put_user(amount, (int __user *)arg);
2993 		break;
2994 	case SIOCINQ:
2995 		amount = unix_inq_len(sk);
2996 		if (amount < 0)
2997 			err = amount;
2998 		else
2999 			err = put_user(amount, (int __user *)arg);
3000 		break;
3001 	case SIOCUNIXFILE:
3002 		err = unix_open_file(sk);
3003 		break;
3004 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3005 	case SIOCATMARK:
3006 		{
3007 			struct sk_buff *skb;
3008 			struct unix_sock *u = unix_sk(sk);
3009 			int answ = 0;
3010 
3011 			skb = skb_peek(&sk->sk_receive_queue);
3012 			if (skb && skb == u->oob_skb)
3013 				answ = 1;
3014 			err = put_user(answ, (int __user *)arg);
3015 		}
3016 		break;
3017 #endif
3018 	default:
3019 		err = -ENOIOCTLCMD;
3020 		break;
3021 	}
3022 	return err;
3023 }
3024 
3025 #ifdef CONFIG_COMPAT
3026 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3027 {
3028 	return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
3029 }
3030 #endif
3031 
3032 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
3033 {
3034 	struct sock *sk = sock->sk;
3035 	__poll_t mask;
3036 
3037 	sock_poll_wait(file, sock, wait);
3038 	mask = 0;
3039 
3040 	/* exceptional events? */
3041 	if (sk->sk_err)
3042 		mask |= EPOLLERR;
3043 	if (sk->sk_shutdown == SHUTDOWN_MASK)
3044 		mask |= EPOLLHUP;
3045 	if (sk->sk_shutdown & RCV_SHUTDOWN)
3046 		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3047 
3048 	/* readable? */
3049 	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3050 		mask |= EPOLLIN | EPOLLRDNORM;
3051 	if (sk_is_readable(sk))
3052 		mask |= EPOLLIN | EPOLLRDNORM;
3053 
3054 	/* Connection-based need to check for termination and startup */
3055 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
3056 	    sk->sk_state == TCP_CLOSE)
3057 		mask |= EPOLLHUP;
3058 
3059 	/*
3060 	 * we set writable also when the other side has shut down the
3061 	 * connection. This prevents stuck sockets.
3062 	 */
3063 	if (unix_writable(sk))
3064 		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3065 
3066 	return mask;
3067 }
3068 
3069 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
3070 				    poll_table *wait)
3071 {
3072 	struct sock *sk = sock->sk, *other;
3073 	unsigned int writable;
3074 	__poll_t mask;
3075 
3076 	sock_poll_wait(file, sock, wait);
3077 	mask = 0;
3078 
3079 	/* exceptional events? */
3080 	if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
3081 		mask |= EPOLLERR |
3082 			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
3083 
3084 	if (sk->sk_shutdown & RCV_SHUTDOWN)
3085 		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3086 	if (sk->sk_shutdown == SHUTDOWN_MASK)
3087 		mask |= EPOLLHUP;
3088 
3089 	/* readable? */
3090 	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3091 		mask |= EPOLLIN | EPOLLRDNORM;
3092 	if (sk_is_readable(sk))
3093 		mask |= EPOLLIN | EPOLLRDNORM;
3094 
3095 	/* Connection-based need to check for termination and startup */
3096 	if (sk->sk_type == SOCK_SEQPACKET) {
3097 		if (sk->sk_state == TCP_CLOSE)
3098 			mask |= EPOLLHUP;
3099 		/* connection hasn't started yet? */
3100 		if (sk->sk_state == TCP_SYN_SENT)
3101 			return mask;
3102 	}
3103 
3104 	/* No write status requested, avoid expensive OUT tests. */
3105 	if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
3106 		return mask;
3107 
3108 	writable = unix_writable(sk);
3109 	if (writable) {
3110 		unix_state_lock(sk);
3111 
3112 		other = unix_peer(sk);
3113 		if (other && unix_peer(other) != sk &&
3114 		    unix_recvq_full_lockless(other) &&
3115 		    unix_dgram_peer_wake_me(sk, other))
3116 			writable = 0;
3117 
3118 		unix_state_unlock(sk);
3119 	}
3120 
3121 	if (writable)
3122 		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3123 	else
3124 		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
3125 
3126 	return mask;
3127 }
3128 
3129 #ifdef CONFIG_PROC_FS
3130 
3131 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
3132 
3133 #define get_bucket(x) ((x) >> BUCKET_SPACE)
3134 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
3135 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
3136 
3137 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
3138 {
3139 	unsigned long offset = get_offset(*pos);
3140 	unsigned long bucket = get_bucket(*pos);
3141 	struct sock *sk;
3142 	unsigned long count = 0;
3143 
3144 	for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
3145 		if (sock_net(sk) != seq_file_net(seq))
3146 			continue;
3147 		if (++count == offset)
3148 			break;
3149 	}
3150 
3151 	return sk;
3152 }
3153 
3154 static struct sock *unix_next_socket(struct seq_file *seq,
3155 				     struct sock *sk,
3156 				     loff_t *pos)
3157 {
3158 	unsigned long bucket;
3159 
3160 	while (sk > (struct sock *)SEQ_START_TOKEN) {
3161 		sk = sk_next(sk);
3162 		if (!sk)
3163 			goto next_bucket;
3164 		if (sock_net(sk) == seq_file_net(seq))
3165 			return sk;
3166 	}
3167 
3168 	do {
3169 		sk = unix_from_bucket(seq, pos);
3170 		if (sk)
3171 			return sk;
3172 
3173 next_bucket:
3174 		bucket = get_bucket(*pos) + 1;
3175 		*pos = set_bucket_offset(bucket, 1);
3176 	} while (bucket < ARRAY_SIZE(unix_socket_table));
3177 
3178 	return NULL;
3179 }
3180 
3181 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
3182 	__acquires(unix_table_lock)
3183 {
3184 	spin_lock(&unix_table_lock);
3185 
3186 	if (!*pos)
3187 		return SEQ_START_TOKEN;
3188 
3189 	if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
3190 		return NULL;
3191 
3192 	return unix_next_socket(seq, NULL, pos);
3193 }
3194 
3195 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3196 {
3197 	++*pos;
3198 	return unix_next_socket(seq, v, pos);
3199 }
3200 
3201 static void unix_seq_stop(struct seq_file *seq, void *v)
3202 	__releases(unix_table_lock)
3203 {
3204 	spin_unlock(&unix_table_lock);
3205 }
3206 
3207 static int unix_seq_show(struct seq_file *seq, void *v)
3208 {
3209 
3210 	if (v == SEQ_START_TOKEN)
3211 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
3212 			 "Inode Path\n");
3213 	else {
3214 		struct sock *s = v;
3215 		struct unix_sock *u = unix_sk(s);
3216 		unix_state_lock(s);
3217 
3218 		seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
3219 			s,
3220 			refcount_read(&s->sk_refcnt),
3221 			0,
3222 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
3223 			s->sk_type,
3224 			s->sk_socket ?
3225 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
3226 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
3227 			sock_i_ino(s));
3228 
3229 		if (u->addr) {	// under unix_table_lock here
3230 			int i, len;
3231 			seq_putc(seq, ' ');
3232 
3233 			i = 0;
3234 			len = u->addr->len - sizeof(short);
3235 			if (!UNIX_ABSTRACT(s))
3236 				len--;
3237 			else {
3238 				seq_putc(seq, '@');
3239 				i++;
3240 			}
3241 			for ( ; i < len; i++)
3242 				seq_putc(seq, u->addr->name->sun_path[i] ?:
3243 					 '@');
3244 		}
3245 		unix_state_unlock(s);
3246 		seq_putc(seq, '\n');
3247 	}
3248 
3249 	return 0;
3250 }
3251 
3252 static const struct seq_operations unix_seq_ops = {
3253 	.start  = unix_seq_start,
3254 	.next   = unix_seq_next,
3255 	.stop   = unix_seq_stop,
3256 	.show   = unix_seq_show,
3257 };
3258 
3259 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL)
3260 struct bpf_iter__unix {
3261 	__bpf_md_ptr(struct bpf_iter_meta *, meta);
3262 	__bpf_md_ptr(struct unix_sock *, unix_sk);
3263 	uid_t uid __aligned(8);
3264 };
3265 
3266 static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
3267 			      struct unix_sock *unix_sk, uid_t uid)
3268 {
3269 	struct bpf_iter__unix ctx;
3270 
3271 	meta->seq_num--;  /* skip SEQ_START_TOKEN */
3272 	ctx.meta = meta;
3273 	ctx.unix_sk = unix_sk;
3274 	ctx.uid = uid;
3275 	return bpf_iter_run_prog(prog, &ctx);
3276 }
3277 
3278 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
3279 {
3280 	struct bpf_iter_meta meta;
3281 	struct bpf_prog *prog;
3282 	struct sock *sk = v;
3283 	uid_t uid;
3284 
3285 	if (v == SEQ_START_TOKEN)
3286 		return 0;
3287 
3288 	uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
3289 	meta.seq = seq;
3290 	prog = bpf_iter_get_info(&meta, false);
3291 	return unix_prog_seq_show(prog, &meta, v, uid);
3292 }
3293 
3294 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
3295 {
3296 	struct bpf_iter_meta meta;
3297 	struct bpf_prog *prog;
3298 
3299 	if (!v) {
3300 		meta.seq = seq;
3301 		prog = bpf_iter_get_info(&meta, true);
3302 		if (prog)
3303 			(void)unix_prog_seq_show(prog, &meta, v, 0);
3304 	}
3305 
3306 	unix_seq_stop(seq, v);
3307 }
3308 
3309 static const struct seq_operations bpf_iter_unix_seq_ops = {
3310 	.start	= unix_seq_start,
3311 	.next	= unix_seq_next,
3312 	.stop	= bpf_iter_unix_seq_stop,
3313 	.show	= bpf_iter_unix_seq_show,
3314 };
3315 #endif
3316 #endif
3317 
3318 static const struct net_proto_family unix_family_ops = {
3319 	.family = PF_UNIX,
3320 	.create = unix_create,
3321 	.owner	= THIS_MODULE,
3322 };
3323 
3324 
3325 static int __net_init unix_net_init(struct net *net)
3326 {
3327 	int error = -ENOMEM;
3328 
3329 	net->unx.sysctl_max_dgram_qlen = 10;
3330 	if (unix_sysctl_register(net))
3331 		goto out;
3332 
3333 #ifdef CONFIG_PROC_FS
3334 	if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
3335 			sizeof(struct seq_net_private))) {
3336 		unix_sysctl_unregister(net);
3337 		goto out;
3338 	}
3339 #endif
3340 	error = 0;
3341 out:
3342 	return error;
3343 }
3344 
3345 static void __net_exit unix_net_exit(struct net *net)
3346 {
3347 	unix_sysctl_unregister(net);
3348 	remove_proc_entry("unix", net->proc_net);
3349 }
3350 
3351 static struct pernet_operations unix_net_ops = {
3352 	.init = unix_net_init,
3353 	.exit = unix_net_exit,
3354 };
3355 
3356 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3357 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
3358 		     struct unix_sock *unix_sk, uid_t uid)
3359 
3360 static const struct bpf_iter_seq_info unix_seq_info = {
3361 	.seq_ops		= &bpf_iter_unix_seq_ops,
3362 	.init_seq_private	= bpf_iter_init_seq_net,
3363 	.fini_seq_private	= bpf_iter_fini_seq_net,
3364 	.seq_priv_size		= sizeof(struct seq_net_private),
3365 };
3366 
3367 static struct bpf_iter_reg unix_reg_info = {
3368 	.target			= "unix",
3369 	.ctx_arg_info_size	= 1,
3370 	.ctx_arg_info		= {
3371 		{ offsetof(struct bpf_iter__unix, unix_sk),
3372 		  PTR_TO_BTF_ID_OR_NULL },
3373 	},
3374 	.seq_info		= &unix_seq_info,
3375 };
3376 
3377 static void __init bpf_iter_register(void)
3378 {
3379 	unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
3380 	if (bpf_iter_reg_target(&unix_reg_info))
3381 		pr_warn("Warning: could not register bpf iterator unix\n");
3382 }
3383 #endif
3384 
3385 static int __init af_unix_init(void)
3386 {
3387 	int rc = -1;
3388 
3389 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
3390 
3391 	rc = proto_register(&unix_dgram_proto, 1);
3392 	if (rc != 0) {
3393 		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3394 		goto out;
3395 	}
3396 
3397 	rc = proto_register(&unix_stream_proto, 1);
3398 	if (rc != 0) {
3399 		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3400 		goto out;
3401 	}
3402 
3403 	sock_register(&unix_family_ops);
3404 	register_pernet_subsys(&unix_net_ops);
3405 	unix_bpf_build_proto();
3406 
3407 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3408 	bpf_iter_register();
3409 #endif
3410 
3411 out:
3412 	return rc;
3413 }
3414 
3415 static void __exit af_unix_exit(void)
3416 {
3417 	sock_unregister(PF_UNIX);
3418 	proto_unregister(&unix_dgram_proto);
3419 	proto_unregister(&unix_stream_proto);
3420 	unregister_pernet_subsys(&unix_net_ops);
3421 }
3422 
3423 /* Earlier than device_initcall() so that other drivers invoking
3424    request_module() don't end up in a loop when modprobe tries
3425    to use a UNIX socket. But later than subsys_initcall() because
3426    we depend on stuff initialised there */
3427 fs_initcall(af_unix_init);
3428 module_exit(af_unix_exit);
3429 
3430 MODULE_LICENSE("GPL");
3431 MODULE_ALIAS_NETPROTO(PF_UNIX);
3432