xref: /linux/net/unix/af_unix.c (revision 6aac2aa2dfae38b60f22c3dfe4103ceefbe2d761)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * NET4:	Implementation of BSD Unix domain sockets.
4  *
5  * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
6  *
7  * Fixes:
8  *		Linus Torvalds	:	Assorted bug cures.
9  *		Niibe Yutaka	:	async I/O support.
10  *		Carsten Paeth	:	PF_UNIX check, address fixes.
11  *		Alan Cox	:	Limit size of allocated blocks.
12  *		Alan Cox	:	Fixed the stupid socketpair bug.
13  *		Alan Cox	:	BSD compatibility fine tuning.
14  *		Alan Cox	:	Fixed a bug in connect when interrupted.
15  *		Alan Cox	:	Sorted out a proper draft version of
16  *					file descriptor passing hacked up from
17  *					Mike Shaver's work.
18  *		Marty Leisner	:	Fixes to fd passing
19  *		Nick Nevin	:	recvmsg bugfix.
20  *		Alan Cox	:	Started proper garbage collector
21  *		Heiko EiBfeldt	:	Missing verify_area check
22  *		Alan Cox	:	Started POSIXisms
23  *		Andreas Schwab	:	Replace inode by dentry for proper
24  *					reference counting
25  *		Kirk Petersen	:	Made this a module
26  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
27  *					Lots of bug fixes.
28  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
29  *					by above two patches.
30  *	     Andrea Arcangeli	:	If possible we block in connect(2)
31  *					if the max backlog of the listen socket
32  *					is been reached. This won't break
33  *					old apps and it will avoid huge amount
34  *					of socks hashed (this for unix_gc()
35  *					performances reasons).
36  *					Security fix that limits the max
37  *					number of socks to 2*max_files and
38  *					the number of skb queueable in the
39  *					dgram receiver.
40  *		Artur Skawina   :	Hash function optimizations
41  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
42  *	      Malcolm Beattie   :	Set peercred for socketpair
43  *	     Michal Ostrowski   :       Module initialization cleanup.
44  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
45  *	     				the core infrastructure is doing that
46  *	     				for all net proto families now (2.5.69+)
47  *
48  * Known differences from reference BSD that was tested:
49  *
50  *	[TO FIX]
51  *	ECONNREFUSED is not returned from one end of a connected() socket to the
52  *		other the moment one end closes.
53  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
54  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
55  *	[NOT TO FIX]
56  *	accept() returns a path name even if the connecting socket has closed
57  *		in the meantime (BSD loses the path and gives up).
58  *	accept() returns 0 length path for an unbound connector. BSD returns 16
59  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
61  *	BSD af_unix apparently has connect forgetting to block properly.
62  *		(need to check this with the POSIX spec in detail)
63  *
64  * Differences from 2.0.0-11-... (ANK)
65  *	Bug fixes and improvements.
66  *		- client shutdown killed server socket.
67  *		- removed all useless cli/sti pairs.
68  *
69  *	Semantic changes/extensions.
70  *		- generic control message passing.
71  *		- SCM_CREDENTIALS control message.
72  *		- "Abstract" (not FS based) socket bindings.
73  *		  Abstract names are sequences of bytes (not zero terminated)
74  *		  started by 0, so that this name space does not intersect
75  *		  with BSD names.
76  */
77 
78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
79 
80 #include <linux/bpf-cgroup.h>
81 #include <linux/btf_ids.h>
82 #include <linux/dcache.h>
83 #include <linux/errno.h>
84 #include <linux/fcntl.h>
85 #include <linux/file.h>
86 #include <linux/filter.h>
87 #include <linux/fs.h>
88 #include <linux/fs_struct.h>
89 #include <linux/init.h>
90 #include <linux/kernel.h>
91 #include <linux/mount.h>
92 #include <linux/namei.h>
93 #include <linux/net.h>
94 #include <linux/pidfs.h>
95 #include <linux/poll.h>
96 #include <linux/proc_fs.h>
97 #include <linux/sched/signal.h>
98 #include <linux/security.h>
99 #include <linux/seq_file.h>
100 #include <linux/skbuff.h>
101 #include <linux/slab.h>
102 #include <linux/socket.h>
103 #include <linux/splice.h>
104 #include <linux/string.h>
105 #include <linux/uaccess.h>
106 #include <net/af_unix.h>
107 #include <net/net_namespace.h>
108 #include <net/scm.h>
109 #include <net/tcp_states.h>
110 #include <uapi/linux/sockios.h>
111 #include <uapi/linux/termios.h>
112 
113 #include "af_unix.h"
114 
115 static atomic_long_t unix_nr_socks;
116 static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
117 static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
118 
119 /* SMP locking strategy:
120  *    hash table is protected with spinlock.
121  *    each socket state is protected by separate spinlock.
122  */
123 #ifdef CONFIG_PROVE_LOCKING
124 #define cmp_ptr(l, r)	(((l) > (r)) - ((l) < (r)))
125 
126 static int unix_table_lock_cmp_fn(const struct lockdep_map *a,
127 				  const struct lockdep_map *b)
128 {
129 	return cmp_ptr(a, b);
130 }
131 
132 static int unix_state_lock_cmp_fn(const struct lockdep_map *_a,
133 				  const struct lockdep_map *_b)
134 {
135 	const struct unix_sock *a, *b;
136 
137 	a = container_of(_a, struct unix_sock, lock.dep_map);
138 	b = container_of(_b, struct unix_sock, lock.dep_map);
139 
140 	if (a->sk.sk_state == TCP_LISTEN) {
141 		/* unix_stream_connect(): Before the 2nd unix_state_lock(),
142 		 *
143 		 *   1. a is TCP_LISTEN.
144 		 *   2. b is not a.
145 		 *   3. concurrent connect(b -> a) must fail.
146 		 *
147 		 * Except for 2. & 3., the b's state can be any possible
148 		 * value due to concurrent connect() or listen().
149 		 *
150 		 * 2. is detected in debug_spin_lock_before(), and 3. cannot
151 		 * be expressed as lock_cmp_fn.
152 		 */
153 		switch (b->sk.sk_state) {
154 		case TCP_CLOSE:
155 		case TCP_ESTABLISHED:
156 		case TCP_LISTEN:
157 			return -1;
158 		default:
159 			/* Invalid case. */
160 			return 0;
161 		}
162 	}
163 
164 	/* Should never happen.  Just to be symmetric. */
165 	if (b->sk.sk_state == TCP_LISTEN) {
166 		switch (b->sk.sk_state) {
167 		case TCP_CLOSE:
168 		case TCP_ESTABLISHED:
169 			return 1;
170 		default:
171 			return 0;
172 		}
173 	}
174 
175 	/* unix_state_double_lock(): ascending address order. */
176 	return cmp_ptr(a, b);
177 }
178 
179 static int unix_recvq_lock_cmp_fn(const struct lockdep_map *_a,
180 				  const struct lockdep_map *_b)
181 {
182 	const struct sock *a, *b;
183 
184 	a = container_of(_a, struct sock, sk_receive_queue.lock.dep_map);
185 	b = container_of(_b, struct sock, sk_receive_queue.lock.dep_map);
186 
187 	/* unix_collect_skb(): listener -> embryo order. */
188 	if (a->sk_state == TCP_LISTEN && unix_sk(b)->listener == a)
189 		return -1;
190 
191 	/* Should never happen.  Just to be symmetric. */
192 	if (b->sk_state == TCP_LISTEN && unix_sk(a)->listener == b)
193 		return 1;
194 
195 	return 0;
196 }
197 #endif
198 
199 static unsigned int unix_unbound_hash(struct sock *sk)
200 {
201 	unsigned long hash = (unsigned long)sk;
202 
203 	hash ^= hash >> 16;
204 	hash ^= hash >> 8;
205 	hash ^= sk->sk_type;
206 
207 	return hash & UNIX_HASH_MOD;
208 }
209 
210 static unsigned int unix_bsd_hash(struct inode *i)
211 {
212 	return i->i_ino & UNIX_HASH_MOD;
213 }
214 
215 static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr,
216 				       int addr_len, int type)
217 {
218 	__wsum csum = csum_partial(sunaddr, addr_len, 0);
219 	unsigned int hash;
220 
221 	hash = (__force unsigned int)csum_fold(csum);
222 	hash ^= hash >> 8;
223 	hash ^= type;
224 
225 	return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD);
226 }
227 
228 static void unix_table_double_lock(struct net *net,
229 				   unsigned int hash1, unsigned int hash2)
230 {
231 	if (hash1 == hash2) {
232 		spin_lock(&net->unx.table.locks[hash1]);
233 		return;
234 	}
235 
236 	if (hash1 > hash2)
237 		swap(hash1, hash2);
238 
239 	spin_lock(&net->unx.table.locks[hash1]);
240 	spin_lock(&net->unx.table.locks[hash2]);
241 }
242 
243 static void unix_table_double_unlock(struct net *net,
244 				     unsigned int hash1, unsigned int hash2)
245 {
246 	if (hash1 == hash2) {
247 		spin_unlock(&net->unx.table.locks[hash1]);
248 		return;
249 	}
250 
251 	spin_unlock(&net->unx.table.locks[hash1]);
252 	spin_unlock(&net->unx.table.locks[hash2]);
253 }
254 
255 #ifdef CONFIG_SECURITY_NETWORK
256 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
257 {
258 	UNIXCB(skb).secid = scm->secid;
259 }
260 
261 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
262 {
263 	scm->secid = UNIXCB(skb).secid;
264 }
265 
266 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
267 {
268 	return (scm->secid == UNIXCB(skb).secid);
269 }
270 #else
271 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
272 { }
273 
274 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
275 { }
276 
277 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
278 {
279 	return true;
280 }
281 #endif /* CONFIG_SECURITY_NETWORK */
282 
283 static inline int unix_may_send(struct sock *sk, struct sock *osk)
284 {
285 	return !unix_peer(osk) || unix_peer(osk) == sk;
286 }
287 
288 static inline int unix_recvq_full_lockless(const struct sock *sk)
289 {
290 	return skb_queue_len_lockless(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
291 }
292 
293 struct sock *unix_peer_get(struct sock *s)
294 {
295 	struct sock *peer;
296 
297 	unix_state_lock(s);
298 	peer = unix_peer(s);
299 	if (peer)
300 		sock_hold(peer);
301 	unix_state_unlock(s);
302 	return peer;
303 }
304 EXPORT_SYMBOL_GPL(unix_peer_get);
305 
306 static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr,
307 					     int addr_len)
308 {
309 	struct unix_address *addr;
310 
311 	addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL);
312 	if (!addr)
313 		return NULL;
314 
315 	refcount_set(&addr->refcnt, 1);
316 	addr->len = addr_len;
317 	memcpy(addr->name, sunaddr, addr_len);
318 
319 	return addr;
320 }
321 
322 static inline void unix_release_addr(struct unix_address *addr)
323 {
324 	if (refcount_dec_and_test(&addr->refcnt))
325 		kfree(addr);
326 }
327 
328 /*
329  *	Check unix socket name:
330  *		- should be not zero length.
331  *	        - if started by not zero, should be NULL terminated (FS object)
332  *		- if started by zero, it is abstract name.
333  */
334 
335 static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len)
336 {
337 	if (addr_len <= offsetof(struct sockaddr_un, sun_path) ||
338 	    addr_len > sizeof(*sunaddr))
339 		return -EINVAL;
340 
341 	if (sunaddr->sun_family != AF_UNIX)
342 		return -EINVAL;
343 
344 	return 0;
345 }
346 
347 static int unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len)
348 {
349 	struct sockaddr_storage *addr = (struct sockaddr_storage *)sunaddr;
350 	short offset = offsetof(struct sockaddr_storage, __data);
351 
352 	BUILD_BUG_ON(offset != offsetof(struct sockaddr_un, sun_path));
353 
354 	/* This may look like an off by one error but it is a bit more
355 	 * subtle.  108 is the longest valid AF_UNIX path for a binding.
356 	 * sun_path[108] doesn't as such exist.  However in kernel space
357 	 * we are guaranteed that it is a valid memory location in our
358 	 * kernel address buffer because syscall functions always pass
359 	 * a pointer of struct sockaddr_storage which has a bigger buffer
360 	 * than 108.  Also, we must terminate sun_path for strlen() in
361 	 * getname_kernel().
362 	 */
363 	addr->__data[addr_len - offset] = 0;
364 
365 	/* Don't pass sunaddr->sun_path to strlen().  Otherwise, 108 will
366 	 * cause panic if CONFIG_FORTIFY_SOURCE=y.  Let __fortify_strlen()
367 	 * know the actual buffer.
368 	 */
369 	return strlen(addr->__data) + offset + 1;
370 }
371 
372 static void __unix_remove_socket(struct sock *sk)
373 {
374 	sk_del_node_init(sk);
375 }
376 
377 static void __unix_insert_socket(struct net *net, struct sock *sk)
378 {
379 	DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
380 	sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]);
381 }
382 
383 static void __unix_set_addr_hash(struct net *net, struct sock *sk,
384 				 struct unix_address *addr, unsigned int hash)
385 {
386 	__unix_remove_socket(sk);
387 	smp_store_release(&unix_sk(sk)->addr, addr);
388 
389 	sk->sk_hash = hash;
390 	__unix_insert_socket(net, sk);
391 }
392 
393 static void unix_remove_socket(struct net *net, struct sock *sk)
394 {
395 	spin_lock(&net->unx.table.locks[sk->sk_hash]);
396 	__unix_remove_socket(sk);
397 	spin_unlock(&net->unx.table.locks[sk->sk_hash]);
398 }
399 
400 static void unix_insert_unbound_socket(struct net *net, struct sock *sk)
401 {
402 	spin_lock(&net->unx.table.locks[sk->sk_hash]);
403 	__unix_insert_socket(net, sk);
404 	spin_unlock(&net->unx.table.locks[sk->sk_hash]);
405 }
406 
407 static void unix_insert_bsd_socket(struct sock *sk)
408 {
409 	spin_lock(&bsd_socket_locks[sk->sk_hash]);
410 	sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]);
411 	spin_unlock(&bsd_socket_locks[sk->sk_hash]);
412 }
413 
414 static void unix_remove_bsd_socket(struct sock *sk)
415 {
416 	if (!hlist_unhashed(&sk->sk_bind_node)) {
417 		spin_lock(&bsd_socket_locks[sk->sk_hash]);
418 		__sk_del_bind_node(sk);
419 		spin_unlock(&bsd_socket_locks[sk->sk_hash]);
420 
421 		sk_node_init(&sk->sk_bind_node);
422 	}
423 }
424 
425 static struct sock *__unix_find_socket_byname(struct net *net,
426 					      struct sockaddr_un *sunname,
427 					      int len, unsigned int hash)
428 {
429 	struct sock *s;
430 
431 	sk_for_each(s, &net->unx.table.buckets[hash]) {
432 		struct unix_sock *u = unix_sk(s);
433 
434 		if (u->addr->len == len &&
435 		    !memcmp(u->addr->name, sunname, len))
436 			return s;
437 	}
438 	return NULL;
439 }
440 
441 static inline struct sock *unix_find_socket_byname(struct net *net,
442 						   struct sockaddr_un *sunname,
443 						   int len, unsigned int hash)
444 {
445 	struct sock *s;
446 
447 	spin_lock(&net->unx.table.locks[hash]);
448 	s = __unix_find_socket_byname(net, sunname, len, hash);
449 	if (s)
450 		sock_hold(s);
451 	spin_unlock(&net->unx.table.locks[hash]);
452 	return s;
453 }
454 
455 static struct sock *unix_find_socket_byinode(struct inode *i)
456 {
457 	unsigned int hash = unix_bsd_hash(i);
458 	struct sock *s;
459 
460 	spin_lock(&bsd_socket_locks[hash]);
461 	sk_for_each_bound(s, &bsd_socket_buckets[hash]) {
462 		struct dentry *dentry = unix_sk(s)->path.dentry;
463 
464 		if (dentry && d_backing_inode(dentry) == i) {
465 			sock_hold(s);
466 			spin_unlock(&bsd_socket_locks[hash]);
467 			return s;
468 		}
469 	}
470 	spin_unlock(&bsd_socket_locks[hash]);
471 	return NULL;
472 }
473 
474 /* Support code for asymmetrically connected dgram sockets
475  *
476  * If a datagram socket is connected to a socket not itself connected
477  * to the first socket (eg, /dev/log), clients may only enqueue more
478  * messages if the present receive queue of the server socket is not
479  * "too large". This means there's a second writeability condition
480  * poll and sendmsg need to test. The dgram recv code will do a wake
481  * up on the peer_wait wait queue of a socket upon reception of a
482  * datagram which needs to be propagated to sleeping would-be writers
483  * since these might not have sent anything so far. This can't be
484  * accomplished via poll_wait because the lifetime of the server
485  * socket might be less than that of its clients if these break their
486  * association with it or if the server socket is closed while clients
487  * are still connected to it and there's no way to inform "a polling
488  * implementation" that it should let go of a certain wait queue
489  *
490  * In order to propagate a wake up, a wait_queue_entry_t of the client
491  * socket is enqueued on the peer_wait queue of the server socket
492  * whose wake function does a wake_up on the ordinary client socket
493  * wait queue. This connection is established whenever a write (or
494  * poll for write) hit the flow control condition and broken when the
495  * association to the server socket is dissolved or after a wake up
496  * was relayed.
497  */
498 
499 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
500 				      void *key)
501 {
502 	struct unix_sock *u;
503 	wait_queue_head_t *u_sleep;
504 
505 	u = container_of(q, struct unix_sock, peer_wake);
506 
507 	__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
508 			    q);
509 	u->peer_wake.private = NULL;
510 
511 	/* relaying can only happen while the wq still exists */
512 	u_sleep = sk_sleep(&u->sk);
513 	if (u_sleep)
514 		wake_up_interruptible_poll(u_sleep, key_to_poll(key));
515 
516 	return 0;
517 }
518 
519 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
520 {
521 	struct unix_sock *u, *u_other;
522 	int rc;
523 
524 	u = unix_sk(sk);
525 	u_other = unix_sk(other);
526 	rc = 0;
527 	spin_lock(&u_other->peer_wait.lock);
528 
529 	if (!u->peer_wake.private) {
530 		u->peer_wake.private = other;
531 		__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
532 
533 		rc = 1;
534 	}
535 
536 	spin_unlock(&u_other->peer_wait.lock);
537 	return rc;
538 }
539 
540 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
541 					    struct sock *other)
542 {
543 	struct unix_sock *u, *u_other;
544 
545 	u = unix_sk(sk);
546 	u_other = unix_sk(other);
547 	spin_lock(&u_other->peer_wait.lock);
548 
549 	if (u->peer_wake.private == other) {
550 		__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
551 		u->peer_wake.private = NULL;
552 	}
553 
554 	spin_unlock(&u_other->peer_wait.lock);
555 }
556 
557 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
558 						   struct sock *other)
559 {
560 	unix_dgram_peer_wake_disconnect(sk, other);
561 	wake_up_interruptible_poll(sk_sleep(sk),
562 				   EPOLLOUT |
563 				   EPOLLWRNORM |
564 				   EPOLLWRBAND);
565 }
566 
567 /* preconditions:
568  *	- unix_peer(sk) == other
569  *	- association is stable
570  */
571 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
572 {
573 	int connected;
574 
575 	connected = unix_dgram_peer_wake_connect(sk, other);
576 
577 	/* If other is SOCK_DEAD, we want to make sure we signal
578 	 * POLLOUT, such that a subsequent write() can get a
579 	 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
580 	 * to other and its full, we will hang waiting for POLLOUT.
581 	 */
582 	if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
583 		return 1;
584 
585 	if (connected)
586 		unix_dgram_peer_wake_disconnect(sk, other);
587 
588 	return 0;
589 }
590 
591 static int unix_writable(const struct sock *sk, unsigned char state)
592 {
593 	return state != TCP_LISTEN &&
594 		(refcount_read(&sk->sk_wmem_alloc) << 2) <= READ_ONCE(sk->sk_sndbuf);
595 }
596 
597 static void unix_write_space(struct sock *sk)
598 {
599 	struct socket_wq *wq;
600 
601 	rcu_read_lock();
602 	if (unix_writable(sk, READ_ONCE(sk->sk_state))) {
603 		wq = rcu_dereference(sk->sk_wq);
604 		if (skwq_has_sleeper(wq))
605 			wake_up_interruptible_sync_poll(&wq->wait,
606 				EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
607 		sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
608 	}
609 	rcu_read_unlock();
610 }
611 
612 /* When dgram socket disconnects (or changes its peer), we clear its receive
613  * queue of packets arrived from previous peer. First, it allows to do
614  * flow control based only on wmem_alloc; second, sk connected to peer
615  * may receive messages only from that peer. */
616 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
617 {
618 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
619 		skb_queue_purge_reason(&sk->sk_receive_queue,
620 				       SKB_DROP_REASON_UNIX_DISCONNECT);
621 
622 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
623 
624 		/* If one link of bidirectional dgram pipe is disconnected,
625 		 * we signal error. Messages are lost. Do not make this,
626 		 * when peer was not connected to us.
627 		 */
628 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
629 			WRITE_ONCE(other->sk_err, ECONNRESET);
630 			sk_error_report(other);
631 		}
632 	}
633 }
634 
635 static void unix_sock_destructor(struct sock *sk)
636 {
637 	struct unix_sock *u = unix_sk(sk);
638 
639 	skb_queue_purge_reason(&sk->sk_receive_queue, SKB_DROP_REASON_SOCKET_CLOSE);
640 
641 	DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc));
642 	DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
643 	DEBUG_NET_WARN_ON_ONCE(sk->sk_socket);
644 	if (!sock_flag(sk, SOCK_DEAD)) {
645 		pr_info("Attempt to release alive unix socket: %p\n", sk);
646 		return;
647 	}
648 
649 	if (u->addr)
650 		unix_release_addr(u->addr);
651 
652 	atomic_long_dec(&unix_nr_socks);
653 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
654 #ifdef UNIX_REFCNT_DEBUG
655 	pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
656 		atomic_long_read(&unix_nr_socks));
657 #endif
658 }
659 
660 static unsigned int unix_skb_len(const struct sk_buff *skb)
661 {
662 	return skb->len - UNIXCB(skb).consumed;
663 }
664 
665 static void unix_release_sock(struct sock *sk, int embrion)
666 {
667 	struct unix_sock *u = unix_sk(sk);
668 	struct sock *skpair;
669 	struct sk_buff *skb;
670 	struct path path;
671 	int state;
672 
673 	unix_remove_socket(sock_net(sk), sk);
674 	unix_remove_bsd_socket(sk);
675 
676 	/* Clear state */
677 	unix_state_lock(sk);
678 	sock_orphan(sk);
679 	WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
680 	path	     = u->path;
681 	u->path.dentry = NULL;
682 	u->path.mnt = NULL;
683 	state = sk->sk_state;
684 	WRITE_ONCE(sk->sk_state, TCP_CLOSE);
685 
686 	skpair = unix_peer(sk);
687 	unix_peer(sk) = NULL;
688 
689 	unix_state_unlock(sk);
690 
691 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
692 	u->oob_skb = NULL;
693 #endif
694 
695 	wake_up_interruptible_all(&u->peer_wait);
696 
697 	if (skpair != NULL) {
698 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
699 			struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
700 
701 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
702 			if (skb && !unix_skb_len(skb))
703 				skb = skb_peek_next(skb, &sk->sk_receive_queue);
704 #endif
705 			unix_state_lock(skpair);
706 			/* No more writes */
707 			WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
708 			if (skb || embrion)
709 				WRITE_ONCE(skpair->sk_err, ECONNRESET);
710 			unix_state_unlock(skpair);
711 			skpair->sk_state_change(skpair);
712 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
713 		}
714 
715 		unix_dgram_peer_wake_disconnect(sk, skpair);
716 		sock_put(skpair); /* It may now die */
717 	}
718 
719 	/* Try to flush out this socket. Throw out buffers at least */
720 
721 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
722 		if (state == TCP_LISTEN)
723 			unix_release_sock(skb->sk, 1);
724 
725 		/* passed fds are erased in the kfree_skb hook */
726 		kfree_skb_reason(skb, SKB_DROP_REASON_SOCKET_CLOSE);
727 	}
728 
729 	if (path.dentry)
730 		path_put(&path);
731 
732 	sock_put(sk);
733 
734 	/* ---- Socket is dead now and most probably destroyed ---- */
735 
736 	unix_schedule_gc(NULL);
737 }
738 
739 struct unix_peercred {
740 	struct pid *peer_pid;
741 	const struct cred *peer_cred;
742 };
743 
744 static inline int prepare_peercred(struct unix_peercred *peercred)
745 {
746 	struct pid *pid;
747 	int err;
748 
749 	pid = task_tgid(current);
750 	err = pidfs_register_pid(pid);
751 	if (likely(!err)) {
752 		peercred->peer_pid = get_pid(pid);
753 		peercred->peer_cred = get_current_cred();
754 	}
755 	return err;
756 }
757 
758 static void drop_peercred(struct unix_peercred *peercred)
759 {
760 	const struct cred *cred = NULL;
761 	struct pid *pid = NULL;
762 
763 	might_sleep();
764 
765 	swap(peercred->peer_pid, pid);
766 	swap(peercred->peer_cred, cred);
767 
768 	put_pid(pid);
769 	put_cred(cred);
770 }
771 
772 static inline void init_peercred(struct sock *sk,
773 				 const struct unix_peercred *peercred)
774 {
775 	sk->sk_peer_pid = peercred->peer_pid;
776 	sk->sk_peer_cred = peercred->peer_cred;
777 }
778 
779 static void update_peercred(struct sock *sk, struct unix_peercred *peercred)
780 {
781 	const struct cred *old_cred;
782 	struct pid *old_pid;
783 
784 	spin_lock(&sk->sk_peer_lock);
785 	old_pid = sk->sk_peer_pid;
786 	old_cred = sk->sk_peer_cred;
787 	init_peercred(sk, peercred);
788 	spin_unlock(&sk->sk_peer_lock);
789 
790 	peercred->peer_pid = old_pid;
791 	peercred->peer_cred = old_cred;
792 }
793 
794 static void copy_peercred(struct sock *sk, struct sock *peersk)
795 {
796 	lockdep_assert_held(&unix_sk(peersk)->lock);
797 
798 	spin_lock(&sk->sk_peer_lock);
799 	sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
800 	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
801 	spin_unlock(&sk->sk_peer_lock);
802 }
803 
804 static bool unix_may_passcred(const struct sock *sk)
805 {
806 	return sk->sk_scm_credentials || sk->sk_scm_pidfd;
807 }
808 
809 static int unix_listen(struct socket *sock, int backlog)
810 {
811 	int err;
812 	struct sock *sk = sock->sk;
813 	struct unix_sock *u = unix_sk(sk);
814 	struct unix_peercred peercred = {};
815 
816 	err = -EOPNOTSUPP;
817 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
818 		goto out;	/* Only stream/seqpacket sockets accept */
819 	err = -EINVAL;
820 	if (!READ_ONCE(u->addr))
821 		goto out;	/* No listens on an unbound socket */
822 	err = prepare_peercred(&peercred);
823 	if (err)
824 		goto out;
825 	unix_state_lock(sk);
826 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
827 		goto out_unlock;
828 	if (backlog > sk->sk_max_ack_backlog)
829 		wake_up_interruptible_all(&u->peer_wait);
830 	sk->sk_max_ack_backlog	= backlog;
831 	WRITE_ONCE(sk->sk_state, TCP_LISTEN);
832 
833 	/* set credentials so connect can copy them */
834 	update_peercred(sk, &peercred);
835 	err = 0;
836 
837 out_unlock:
838 	unix_state_unlock(sk);
839 	drop_peercred(&peercred);
840 out:
841 	return err;
842 }
843 
844 static int unix_release(struct socket *);
845 static int unix_bind(struct socket *, struct sockaddr_unsized *, int);
846 static int unix_stream_connect(struct socket *, struct sockaddr_unsized *,
847 			       int addr_len, int flags);
848 static int unix_socketpair(struct socket *, struct socket *);
849 static int unix_accept(struct socket *, struct socket *, struct proto_accept_arg *arg);
850 static int unix_getname(struct socket *, struct sockaddr *, int);
851 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
852 static __poll_t unix_dgram_poll(struct file *, struct socket *,
853 				    poll_table *);
854 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
855 #ifdef CONFIG_COMPAT
856 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
857 #endif
858 static int unix_shutdown(struct socket *, int);
859 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
860 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
861 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
862 				       struct pipe_inode_info *, size_t size,
863 				       unsigned int flags);
864 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
865 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
866 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
867 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
868 static int unix_dgram_connect(struct socket *, struct sockaddr_unsized *,
869 			      int, int);
870 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
871 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
872 				  int);
873 
874 #ifdef CONFIG_PROC_FS
875 static int unix_count_nr_fds(struct sock *sk)
876 {
877 	struct sk_buff *skb;
878 	struct unix_sock *u;
879 	int nr_fds = 0;
880 
881 	spin_lock(&sk->sk_receive_queue.lock);
882 	skb = skb_peek(&sk->sk_receive_queue);
883 	while (skb) {
884 		u = unix_sk(skb->sk);
885 		nr_fds += atomic_read(&u->scm_stat.nr_fds);
886 		skb = skb_peek_next(skb, &sk->sk_receive_queue);
887 	}
888 	spin_unlock(&sk->sk_receive_queue.lock);
889 
890 	return nr_fds;
891 }
892 
893 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
894 {
895 	struct sock *sk = sock->sk;
896 	unsigned char s_state;
897 	struct unix_sock *u;
898 	int nr_fds = 0;
899 
900 	if (sk) {
901 		s_state = READ_ONCE(sk->sk_state);
902 		u = unix_sk(sk);
903 
904 		/* SOCK_STREAM and SOCK_SEQPACKET sockets never change their
905 		 * sk_state after switching to TCP_ESTABLISHED or TCP_LISTEN.
906 		 * SOCK_DGRAM is ordinary. So, no lock is needed.
907 		 */
908 		if (sock->type == SOCK_DGRAM || s_state == TCP_ESTABLISHED)
909 			nr_fds = atomic_read(&u->scm_stat.nr_fds);
910 		else if (s_state == TCP_LISTEN)
911 			nr_fds = unix_count_nr_fds(sk);
912 
913 		seq_printf(m, "scm_fds: %u\n", nr_fds);
914 	}
915 }
916 #else
917 #define unix_show_fdinfo NULL
918 #endif
919 
920 static bool unix_custom_sockopt(int optname)
921 {
922 	switch (optname) {
923 	case SO_INQ:
924 		return true;
925 	default:
926 		return false;
927 	}
928 }
929 
930 static int unix_setsockopt(struct socket *sock, int level, int optname,
931 			   sockptr_t optval, unsigned int optlen)
932 {
933 	struct unix_sock *u = unix_sk(sock->sk);
934 	struct sock *sk = sock->sk;
935 	int val;
936 
937 	if (level != SOL_SOCKET)
938 		return -EOPNOTSUPP;
939 
940 	if (!unix_custom_sockopt(optname))
941 		return sock_setsockopt(sock, level, optname, optval, optlen);
942 
943 	if (optlen != sizeof(int))
944 		return -EINVAL;
945 
946 	if (copy_from_sockptr(&val, optval, sizeof(val)))
947 		return -EFAULT;
948 
949 	switch (optname) {
950 	case SO_INQ:
951 		if (sk->sk_type != SOCK_STREAM)
952 			return -EINVAL;
953 
954 		if (val > 1 || val < 0)
955 			return -EINVAL;
956 
957 		WRITE_ONCE(u->recvmsg_inq, val);
958 		break;
959 	default:
960 		return -ENOPROTOOPT;
961 	}
962 
963 	return 0;
964 }
965 
966 static const struct proto_ops unix_stream_ops = {
967 	.family =	PF_UNIX,
968 	.owner =	THIS_MODULE,
969 	.release =	unix_release,
970 	.bind =		unix_bind,
971 	.connect =	unix_stream_connect,
972 	.socketpair =	unix_socketpair,
973 	.accept =	unix_accept,
974 	.getname =	unix_getname,
975 	.poll =		unix_poll,
976 	.ioctl =	unix_ioctl,
977 #ifdef CONFIG_COMPAT
978 	.compat_ioctl =	unix_compat_ioctl,
979 #endif
980 	.listen =	unix_listen,
981 	.shutdown =	unix_shutdown,
982 	.setsockopt =	unix_setsockopt,
983 	.sendmsg =	unix_stream_sendmsg,
984 	.recvmsg =	unix_stream_recvmsg,
985 	.read_skb =	unix_stream_read_skb,
986 	.mmap =		sock_no_mmap,
987 	.splice_read =	unix_stream_splice_read,
988 	.set_peek_off =	sk_set_peek_off,
989 	.show_fdinfo =	unix_show_fdinfo,
990 };
991 
992 static const struct proto_ops unix_dgram_ops = {
993 	.family =	PF_UNIX,
994 	.owner =	THIS_MODULE,
995 	.release =	unix_release,
996 	.bind =		unix_bind,
997 	.connect =	unix_dgram_connect,
998 	.socketpair =	unix_socketpair,
999 	.accept =	sock_no_accept,
1000 	.getname =	unix_getname,
1001 	.poll =		unix_dgram_poll,
1002 	.ioctl =	unix_ioctl,
1003 #ifdef CONFIG_COMPAT
1004 	.compat_ioctl =	unix_compat_ioctl,
1005 #endif
1006 	.listen =	sock_no_listen,
1007 	.shutdown =	unix_shutdown,
1008 	.sendmsg =	unix_dgram_sendmsg,
1009 	.read_skb =	unix_read_skb,
1010 	.recvmsg =	unix_dgram_recvmsg,
1011 	.mmap =		sock_no_mmap,
1012 	.set_peek_off =	sk_set_peek_off,
1013 	.show_fdinfo =	unix_show_fdinfo,
1014 };
1015 
1016 static const struct proto_ops unix_seqpacket_ops = {
1017 	.family =	PF_UNIX,
1018 	.owner =	THIS_MODULE,
1019 	.release =	unix_release,
1020 	.bind =		unix_bind,
1021 	.connect =	unix_stream_connect,
1022 	.socketpair =	unix_socketpair,
1023 	.accept =	unix_accept,
1024 	.getname =	unix_getname,
1025 	.poll =		unix_dgram_poll,
1026 	.ioctl =	unix_ioctl,
1027 #ifdef CONFIG_COMPAT
1028 	.compat_ioctl =	unix_compat_ioctl,
1029 #endif
1030 	.listen =	unix_listen,
1031 	.shutdown =	unix_shutdown,
1032 	.sendmsg =	unix_seqpacket_sendmsg,
1033 	.recvmsg =	unix_seqpacket_recvmsg,
1034 	.mmap =		sock_no_mmap,
1035 	.set_peek_off =	sk_set_peek_off,
1036 	.show_fdinfo =	unix_show_fdinfo,
1037 };
1038 
1039 static void unix_close(struct sock *sk, long timeout)
1040 {
1041 	/* Nothing to do here, unix socket does not need a ->close().
1042 	 * This is merely for sockmap.
1043 	 */
1044 }
1045 
1046 static bool unix_bpf_bypass_getsockopt(int level, int optname)
1047 {
1048 	if (level == SOL_SOCKET) {
1049 		switch (optname) {
1050 		case SO_PEERPIDFD:
1051 			return true;
1052 		default:
1053 			return false;
1054 		}
1055 	}
1056 
1057 	return false;
1058 }
1059 
1060 struct proto unix_dgram_proto = {
1061 	.name			= "UNIX",
1062 	.owner			= THIS_MODULE,
1063 	.obj_size		= sizeof(struct unix_sock),
1064 	.close			= unix_close,
1065 	.bpf_bypass_getsockopt	= unix_bpf_bypass_getsockopt,
1066 #ifdef CONFIG_BPF_SYSCALL
1067 	.psock_update_sk_prot	= unix_dgram_bpf_update_proto,
1068 #endif
1069 };
1070 
1071 struct proto unix_stream_proto = {
1072 	.name			= "UNIX-STREAM",
1073 	.owner			= THIS_MODULE,
1074 	.obj_size		= sizeof(struct unix_sock),
1075 	.close			= unix_close,
1076 	.bpf_bypass_getsockopt	= unix_bpf_bypass_getsockopt,
1077 #ifdef CONFIG_BPF_SYSCALL
1078 	.psock_update_sk_prot	= unix_stream_bpf_update_proto,
1079 #endif
1080 };
1081 
1082 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
1083 {
1084 	struct unix_sock *u;
1085 	struct sock *sk;
1086 	int err;
1087 
1088 	atomic_long_inc(&unix_nr_socks);
1089 	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) {
1090 		err = -ENFILE;
1091 		goto err;
1092 	}
1093 
1094 	if (type == SOCK_STREAM)
1095 		sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
1096 	else /*dgram and  seqpacket */
1097 		sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
1098 
1099 	if (!sk) {
1100 		err = -ENOMEM;
1101 		goto err;
1102 	}
1103 
1104 	sock_init_data(sock, sk);
1105 
1106 	sk->sk_scm_rights	= 1;
1107 	sk->sk_hash		= unix_unbound_hash(sk);
1108 	sk->sk_allocation	= GFP_KERNEL_ACCOUNT;
1109 	sk->sk_write_space	= unix_write_space;
1110 	sk->sk_max_ack_backlog	= READ_ONCE(net->unx.sysctl_max_dgram_qlen);
1111 	sk->sk_destruct		= unix_sock_destructor;
1112 	lock_set_cmp_fn(&sk->sk_receive_queue.lock, unix_recvq_lock_cmp_fn, NULL);
1113 
1114 	u = unix_sk(sk);
1115 	u->listener = NULL;
1116 	u->vertex = NULL;
1117 	u->path.dentry = NULL;
1118 	u->path.mnt = NULL;
1119 	spin_lock_init(&u->lock);
1120 	lock_set_cmp_fn(&u->lock, unix_state_lock_cmp_fn, NULL);
1121 	mutex_init(&u->iolock); /* single task reading lock */
1122 	mutex_init(&u->bindlock); /* single task binding lock */
1123 	init_waitqueue_head(&u->peer_wait);
1124 	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
1125 	memset(&u->scm_stat, 0, sizeof(struct scm_stat));
1126 	unix_insert_unbound_socket(net, sk);
1127 
1128 	sock_prot_inuse_add(net, sk->sk_prot, 1);
1129 
1130 	return sk;
1131 
1132 err:
1133 	atomic_long_dec(&unix_nr_socks);
1134 	return ERR_PTR(err);
1135 }
1136 
1137 static int unix_create(struct net *net, struct socket *sock, int protocol,
1138 		       int kern)
1139 {
1140 	struct sock *sk;
1141 
1142 	if (protocol && protocol != PF_UNIX)
1143 		return -EPROTONOSUPPORT;
1144 
1145 	sock->state = SS_UNCONNECTED;
1146 
1147 	switch (sock->type) {
1148 	case SOCK_STREAM:
1149 		set_bit(SOCK_CUSTOM_SOCKOPT, &sock->flags);
1150 		sock->ops = &unix_stream_ops;
1151 		break;
1152 		/*
1153 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
1154 		 *	nothing uses it.
1155 		 */
1156 	case SOCK_RAW:
1157 		sock->type = SOCK_DGRAM;
1158 		fallthrough;
1159 	case SOCK_DGRAM:
1160 		sock->ops = &unix_dgram_ops;
1161 		break;
1162 	case SOCK_SEQPACKET:
1163 		sock->ops = &unix_seqpacket_ops;
1164 		break;
1165 	default:
1166 		return -ESOCKTNOSUPPORT;
1167 	}
1168 
1169 	sk = unix_create1(net, sock, kern, sock->type);
1170 	if (IS_ERR(sk))
1171 		return PTR_ERR(sk);
1172 
1173 	return 0;
1174 }
1175 
1176 static int unix_release(struct socket *sock)
1177 {
1178 	struct sock *sk = sock->sk;
1179 
1180 	if (!sk)
1181 		return 0;
1182 
1183 	sk->sk_prot->close(sk, 0);
1184 	unix_release_sock(sk, 0);
1185 	sock->sk = NULL;
1186 
1187 	return 0;
1188 }
1189 
1190 static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len,
1191 				  int type, int flags)
1192 {
1193 	struct inode *inode;
1194 	struct path path;
1195 	struct sock *sk;
1196 	int err;
1197 
1198 	unix_mkname_bsd(sunaddr, addr_len);
1199 
1200 	if (flags & SOCK_COREDUMP) {
1201 		const struct cred *cred;
1202 		struct cred *kcred;
1203 		struct path root;
1204 
1205 		kcred = prepare_kernel_cred(&init_task);
1206 		if (!kcred) {
1207 			err = -ENOMEM;
1208 			goto fail;
1209 		}
1210 
1211 		task_lock(&init_task);
1212 		get_fs_root(init_task.fs, &root);
1213 		task_unlock(&init_task);
1214 
1215 		cred = override_creds(kcred);
1216 		err = vfs_path_lookup(root.dentry, root.mnt, sunaddr->sun_path,
1217 				      LOOKUP_BENEATH | LOOKUP_NO_SYMLINKS |
1218 				      LOOKUP_NO_MAGICLINKS, &path);
1219 		put_cred(revert_creds(cred));
1220 		path_put(&root);
1221 		if (err)
1222 			goto fail;
1223 	} else {
1224 		err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path);
1225 		if (err)
1226 			goto fail;
1227 
1228 		err = path_permission(&path, MAY_WRITE);
1229 		if (err)
1230 			goto path_put;
1231 	}
1232 
1233 	err = -ECONNREFUSED;
1234 	inode = d_backing_inode(path.dentry);
1235 	if (!S_ISSOCK(inode->i_mode))
1236 		goto path_put;
1237 
1238 	sk = unix_find_socket_byinode(inode);
1239 	if (!sk)
1240 		goto path_put;
1241 
1242 	err = -EPROTOTYPE;
1243 	if (sk->sk_type == type)
1244 		touch_atime(&path);
1245 	else
1246 		goto sock_put;
1247 
1248 	path_put(&path);
1249 
1250 	return sk;
1251 
1252 sock_put:
1253 	sock_put(sk);
1254 path_put:
1255 	path_put(&path);
1256 fail:
1257 	return ERR_PTR(err);
1258 }
1259 
1260 static struct sock *unix_find_abstract(struct net *net,
1261 				       struct sockaddr_un *sunaddr,
1262 				       int addr_len, int type)
1263 {
1264 	unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type);
1265 	struct dentry *dentry;
1266 	struct sock *sk;
1267 
1268 	sk = unix_find_socket_byname(net, sunaddr, addr_len, hash);
1269 	if (!sk)
1270 		return ERR_PTR(-ECONNREFUSED);
1271 
1272 	dentry = unix_sk(sk)->path.dentry;
1273 	if (dentry)
1274 		touch_atime(&unix_sk(sk)->path);
1275 
1276 	return sk;
1277 }
1278 
1279 static struct sock *unix_find_other(struct net *net,
1280 				    struct sockaddr_un *sunaddr,
1281 				    int addr_len, int type, int flags)
1282 {
1283 	struct sock *sk;
1284 
1285 	if (sunaddr->sun_path[0])
1286 		sk = unix_find_bsd(sunaddr, addr_len, type, flags);
1287 	else
1288 		sk = unix_find_abstract(net, sunaddr, addr_len, type);
1289 
1290 	return sk;
1291 }
1292 
1293 static int unix_autobind(struct sock *sk)
1294 {
1295 	struct unix_sock *u = unix_sk(sk);
1296 	unsigned int new_hash, old_hash;
1297 	struct net *net = sock_net(sk);
1298 	struct unix_address *addr;
1299 	u32 lastnum, ordernum;
1300 	int err;
1301 
1302 	err = mutex_lock_interruptible(&u->bindlock);
1303 	if (err)
1304 		return err;
1305 
1306 	if (u->addr)
1307 		goto out;
1308 
1309 	err = -ENOMEM;
1310 	addr = kzalloc(sizeof(*addr) +
1311 		       offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL);
1312 	if (!addr)
1313 		goto out;
1314 
1315 	addr->len = offsetof(struct sockaddr_un, sun_path) + 6;
1316 	addr->name->sun_family = AF_UNIX;
1317 	refcount_set(&addr->refcnt, 1);
1318 
1319 	old_hash = sk->sk_hash;
1320 	ordernum = get_random_u32();
1321 	lastnum = ordernum & 0xFFFFF;
1322 retry:
1323 	ordernum = (ordernum + 1) & 0xFFFFF;
1324 	sprintf(addr->name->sun_path + 1, "%05x", ordernum);
1325 
1326 	new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
1327 	unix_table_double_lock(net, old_hash, new_hash);
1328 
1329 	if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) {
1330 		unix_table_double_unlock(net, old_hash, new_hash);
1331 
1332 		/* __unix_find_socket_byname() may take long time if many names
1333 		 * are already in use.
1334 		 */
1335 		cond_resched();
1336 
1337 		if (ordernum == lastnum) {
1338 			/* Give up if all names seems to be in use. */
1339 			err = -ENOSPC;
1340 			unix_release_addr(addr);
1341 			goto out;
1342 		}
1343 
1344 		goto retry;
1345 	}
1346 
1347 	__unix_set_addr_hash(net, sk, addr, new_hash);
1348 	unix_table_double_unlock(net, old_hash, new_hash);
1349 	err = 0;
1350 
1351 out:	mutex_unlock(&u->bindlock);
1352 	return err;
1353 }
1354 
1355 static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
1356 			 int addr_len)
1357 {
1358 	umode_t mode = S_IFSOCK |
1359 	       (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
1360 	struct unix_sock *u = unix_sk(sk);
1361 	unsigned int new_hash, old_hash;
1362 	struct net *net = sock_net(sk);
1363 	struct mnt_idmap *idmap;
1364 	struct unix_address *addr;
1365 	struct dentry *dentry;
1366 	struct path parent;
1367 	int err;
1368 
1369 	addr_len = unix_mkname_bsd(sunaddr, addr_len);
1370 	addr = unix_create_addr(sunaddr, addr_len);
1371 	if (!addr)
1372 		return -ENOMEM;
1373 
1374 	/*
1375 	 * Get the parent directory, calculate the hash for last
1376 	 * component.
1377 	 */
1378 	dentry = start_creating_path(AT_FDCWD, addr->name->sun_path, &parent, 0);
1379 	if (IS_ERR(dentry)) {
1380 		err = PTR_ERR(dentry);
1381 		goto out;
1382 	}
1383 
1384 	/*
1385 	 * All right, let's create it.
1386 	 */
1387 	idmap = mnt_idmap(parent.mnt);
1388 	err = security_path_mknod(&parent, dentry, mode, 0);
1389 	if (!err)
1390 		err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0);
1391 	if (err)
1392 		goto out_path;
1393 	err = mutex_lock_interruptible(&u->bindlock);
1394 	if (err)
1395 		goto out_unlink;
1396 	if (u->addr)
1397 		goto out_unlock;
1398 
1399 	old_hash = sk->sk_hash;
1400 	new_hash = unix_bsd_hash(d_backing_inode(dentry));
1401 	unix_table_double_lock(net, old_hash, new_hash);
1402 	u->path.mnt = mntget(parent.mnt);
1403 	u->path.dentry = dget(dentry);
1404 	__unix_set_addr_hash(net, sk, addr, new_hash);
1405 	unix_table_double_unlock(net, old_hash, new_hash);
1406 	unix_insert_bsd_socket(sk);
1407 	mutex_unlock(&u->bindlock);
1408 	end_creating_path(&parent, dentry);
1409 	return 0;
1410 
1411 out_unlock:
1412 	mutex_unlock(&u->bindlock);
1413 	err = -EINVAL;
1414 out_unlink:
1415 	/* failed after successful mknod?  unlink what we'd created... */
1416 	vfs_unlink(idmap, d_inode(parent.dentry), dentry, NULL);
1417 out_path:
1418 	end_creating_path(&parent, dentry);
1419 out:
1420 	unix_release_addr(addr);
1421 	return err == -EEXIST ? -EADDRINUSE : err;
1422 }
1423 
1424 static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
1425 			      int addr_len)
1426 {
1427 	struct unix_sock *u = unix_sk(sk);
1428 	unsigned int new_hash, old_hash;
1429 	struct net *net = sock_net(sk);
1430 	struct unix_address *addr;
1431 	int err;
1432 
1433 	addr = unix_create_addr(sunaddr, addr_len);
1434 	if (!addr)
1435 		return -ENOMEM;
1436 
1437 	err = mutex_lock_interruptible(&u->bindlock);
1438 	if (err)
1439 		goto out;
1440 
1441 	if (u->addr) {
1442 		err = -EINVAL;
1443 		goto out_mutex;
1444 	}
1445 
1446 	old_hash = sk->sk_hash;
1447 	new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
1448 	unix_table_double_lock(net, old_hash, new_hash);
1449 
1450 	if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash))
1451 		goto out_spin;
1452 
1453 	__unix_set_addr_hash(net, sk, addr, new_hash);
1454 	unix_table_double_unlock(net, old_hash, new_hash);
1455 	mutex_unlock(&u->bindlock);
1456 	return 0;
1457 
1458 out_spin:
1459 	unix_table_double_unlock(net, old_hash, new_hash);
1460 	err = -EADDRINUSE;
1461 out_mutex:
1462 	mutex_unlock(&u->bindlock);
1463 out:
1464 	unix_release_addr(addr);
1465 	return err;
1466 }
1467 
1468 static int unix_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len)
1469 {
1470 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1471 	struct sock *sk = sock->sk;
1472 	int err;
1473 
1474 	if (addr_len == offsetof(struct sockaddr_un, sun_path) &&
1475 	    sunaddr->sun_family == AF_UNIX)
1476 		return unix_autobind(sk);
1477 
1478 	err = unix_validate_addr(sunaddr, addr_len);
1479 	if (err)
1480 		return err;
1481 
1482 	if (sunaddr->sun_path[0])
1483 		err = unix_bind_bsd(sk, sunaddr, addr_len);
1484 	else
1485 		err = unix_bind_abstract(sk, sunaddr, addr_len);
1486 
1487 	return err;
1488 }
1489 
1490 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1491 {
1492 	if (unlikely(sk1 == sk2) || !sk2) {
1493 		unix_state_lock(sk1);
1494 		return;
1495 	}
1496 
1497 	if (sk1 > sk2)
1498 		swap(sk1, sk2);
1499 
1500 	unix_state_lock(sk1);
1501 	unix_state_lock(sk2);
1502 }
1503 
1504 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1505 {
1506 	if (unlikely(sk1 == sk2) || !sk2) {
1507 		unix_state_unlock(sk1);
1508 		return;
1509 	}
1510 	unix_state_unlock(sk1);
1511 	unix_state_unlock(sk2);
1512 }
1513 
1514 static int unix_dgram_connect(struct socket *sock, struct sockaddr_unsized *addr,
1515 			      int alen, int flags)
1516 {
1517 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1518 	struct sock *sk = sock->sk;
1519 	struct sock *other;
1520 	int err;
1521 
1522 	err = -EINVAL;
1523 	if (alen < offsetofend(struct sockaddr, sa_family))
1524 		goto out;
1525 
1526 	if (addr->sa_family != AF_UNSPEC) {
1527 		err = unix_validate_addr(sunaddr, alen);
1528 		if (err)
1529 			goto out;
1530 
1531 		err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, addr, &alen);
1532 		if (err)
1533 			goto out;
1534 
1535 		if (unix_may_passcred(sk) && !READ_ONCE(unix_sk(sk)->addr)) {
1536 			err = unix_autobind(sk);
1537 			if (err)
1538 				goto out;
1539 		}
1540 
1541 restart:
1542 		other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type, 0);
1543 		if (IS_ERR(other)) {
1544 			err = PTR_ERR(other);
1545 			goto out;
1546 		}
1547 
1548 		unix_state_double_lock(sk, other);
1549 
1550 		/* Apparently VFS overslept socket death. Retry. */
1551 		if (sock_flag(other, SOCK_DEAD)) {
1552 			unix_state_double_unlock(sk, other);
1553 			sock_put(other);
1554 			goto restart;
1555 		}
1556 
1557 		err = -EPERM;
1558 		if (!unix_may_send(sk, other))
1559 			goto out_unlock;
1560 
1561 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1562 		if (err)
1563 			goto out_unlock;
1564 
1565 		WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED);
1566 		WRITE_ONCE(other->sk_state, TCP_ESTABLISHED);
1567 	} else {
1568 		/*
1569 		 *	1003.1g breaking connected state with AF_UNSPEC
1570 		 */
1571 		other = NULL;
1572 		unix_state_double_lock(sk, other);
1573 	}
1574 
1575 	/*
1576 	 * If it was connected, reconnect.
1577 	 */
1578 	if (unix_peer(sk)) {
1579 		struct sock *old_peer = unix_peer(sk);
1580 
1581 		unix_peer(sk) = other;
1582 		if (!other)
1583 			WRITE_ONCE(sk->sk_state, TCP_CLOSE);
1584 		unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1585 
1586 		unix_state_double_unlock(sk, other);
1587 
1588 		if (other != old_peer) {
1589 			unix_dgram_disconnected(sk, old_peer);
1590 
1591 			unix_state_lock(old_peer);
1592 			if (!unix_peer(old_peer))
1593 				WRITE_ONCE(old_peer->sk_state, TCP_CLOSE);
1594 			unix_state_unlock(old_peer);
1595 		}
1596 
1597 		sock_put(old_peer);
1598 	} else {
1599 		unix_peer(sk) = other;
1600 		unix_state_double_unlock(sk, other);
1601 	}
1602 
1603 	return 0;
1604 
1605 out_unlock:
1606 	unix_state_double_unlock(sk, other);
1607 	sock_put(other);
1608 out:
1609 	return err;
1610 }
1611 
1612 static long unix_wait_for_peer(struct sock *other, long timeo)
1613 {
1614 	struct unix_sock *u = unix_sk(other);
1615 	int sched;
1616 	DEFINE_WAIT(wait);
1617 
1618 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1619 
1620 	sched = !sock_flag(other, SOCK_DEAD) &&
1621 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1622 		unix_recvq_full_lockless(other);
1623 
1624 	unix_state_unlock(other);
1625 
1626 	if (sched)
1627 		timeo = schedule_timeout(timeo);
1628 
1629 	finish_wait(&u->peer_wait, &wait);
1630 	return timeo;
1631 }
1632 
1633 static int unix_stream_connect(struct socket *sock, struct sockaddr_unsized *uaddr,
1634 			       int addr_len, int flags)
1635 {
1636 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1637 	struct sock *sk = sock->sk, *newsk = NULL, *other = NULL;
1638 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1639 	struct unix_peercred peercred = {};
1640 	struct net *net = sock_net(sk);
1641 	struct sk_buff *skb = NULL;
1642 	unsigned char state;
1643 	long timeo;
1644 	int err;
1645 
1646 	err = unix_validate_addr(sunaddr, addr_len);
1647 	if (err)
1648 		goto out;
1649 
1650 	err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, &addr_len);
1651 	if (err)
1652 		goto out;
1653 
1654 	if (unix_may_passcred(sk) && !READ_ONCE(u->addr)) {
1655 		err = unix_autobind(sk);
1656 		if (err)
1657 			goto out;
1658 	}
1659 
1660 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1661 
1662 	/* First of all allocate resources.
1663 	 * If we will make it after state is locked,
1664 	 * we will have to recheck all again in any case.
1665 	 */
1666 
1667 	/* create new sock for complete connection */
1668 	newsk = unix_create1(net, NULL, 0, sock->type);
1669 	if (IS_ERR(newsk)) {
1670 		err = PTR_ERR(newsk);
1671 		goto out;
1672 	}
1673 
1674 	err = prepare_peercred(&peercred);
1675 	if (err)
1676 		goto out;
1677 
1678 	/* Allocate skb for sending to listening sock */
1679 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1680 	if (!skb) {
1681 		err = -ENOMEM;
1682 		goto out_free_sk;
1683 	}
1684 
1685 restart:
1686 	/*  Find listening sock. */
1687 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, flags);
1688 	if (IS_ERR(other)) {
1689 		err = PTR_ERR(other);
1690 		goto out_free_skb;
1691 	}
1692 
1693 	unix_state_lock(other);
1694 
1695 	/* Apparently VFS overslept socket death. Retry. */
1696 	if (sock_flag(other, SOCK_DEAD)) {
1697 		unix_state_unlock(other);
1698 		sock_put(other);
1699 		goto restart;
1700 	}
1701 
1702 	if (other->sk_state != TCP_LISTEN ||
1703 	    other->sk_shutdown & RCV_SHUTDOWN) {
1704 		err = -ECONNREFUSED;
1705 		goto out_unlock;
1706 	}
1707 
1708 	if (unix_recvq_full_lockless(other)) {
1709 		if (!timeo) {
1710 			err = -EAGAIN;
1711 			goto out_unlock;
1712 		}
1713 
1714 		timeo = unix_wait_for_peer(other, timeo);
1715 		sock_put(other);
1716 
1717 		err = sock_intr_errno(timeo);
1718 		if (signal_pending(current))
1719 			goto out_free_skb;
1720 
1721 		goto restart;
1722 	}
1723 
1724 	/* self connect and simultaneous connect are eliminated
1725 	 * by rejecting TCP_LISTEN socket to avoid deadlock.
1726 	 */
1727 	state = READ_ONCE(sk->sk_state);
1728 	if (unlikely(state != TCP_CLOSE)) {
1729 		err = state == TCP_ESTABLISHED ? -EISCONN : -EINVAL;
1730 		goto out_unlock;
1731 	}
1732 
1733 	unix_state_lock(sk);
1734 
1735 	if (unlikely(sk->sk_state != TCP_CLOSE)) {
1736 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EINVAL;
1737 		unix_state_unlock(sk);
1738 		goto out_unlock;
1739 	}
1740 
1741 	err = security_unix_stream_connect(sk, other, newsk);
1742 	if (err) {
1743 		unix_state_unlock(sk);
1744 		goto out_unlock;
1745 	}
1746 
1747 	/* The way is open! Fastly set all the necessary fields... */
1748 
1749 	sock_hold(sk);
1750 	unix_peer(newsk) = sk;
1751 	newsk->sk_state = TCP_ESTABLISHED;
1752 	newsk->sk_type = sk->sk_type;
1753 	newsk->sk_scm_recv_flags = other->sk_scm_recv_flags;
1754 	init_peercred(newsk, &peercred);
1755 
1756 	newu = unix_sk(newsk);
1757 	newu->listener = other;
1758 	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1759 	otheru = unix_sk(other);
1760 
1761 	/* copy address information from listening to new sock
1762 	 *
1763 	 * The contents of *(otheru->addr) and otheru->path
1764 	 * are seen fully set up here, since we have found
1765 	 * otheru in hash under its lock.  Insertion into the
1766 	 * hash chain we'd found it in had been done in an
1767 	 * earlier critical area protected by the chain's lock,
1768 	 * the same one where we'd set *(otheru->addr) contents,
1769 	 * as well as otheru->path and otheru->addr itself.
1770 	 *
1771 	 * Using smp_store_release() here to set newu->addr
1772 	 * is enough to make those stores, as well as stores
1773 	 * to newu->path visible to anyone who gets newu->addr
1774 	 * by smp_load_acquire().  IOW, the same warranties
1775 	 * as for unix_sock instances bound in unix_bind() or
1776 	 * in unix_autobind().
1777 	 */
1778 	if (otheru->path.dentry) {
1779 		path_get(&otheru->path);
1780 		newu->path = otheru->path;
1781 	}
1782 	refcount_inc(&otheru->addr->refcnt);
1783 	smp_store_release(&newu->addr, otheru->addr);
1784 
1785 	/* Set credentials */
1786 	copy_peercred(sk, other);
1787 
1788 	sock->state	= SS_CONNECTED;
1789 	WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED);
1790 	sock_hold(newsk);
1791 
1792 	smp_mb__after_atomic();	/* sock_hold() does an atomic_inc() */
1793 	unix_peer(sk)	= newsk;
1794 
1795 	unix_state_unlock(sk);
1796 
1797 	/* take ten and send info to listening sock */
1798 	spin_lock(&other->sk_receive_queue.lock);
1799 	__skb_queue_tail(&other->sk_receive_queue, skb);
1800 	spin_unlock(&other->sk_receive_queue.lock);
1801 	unix_state_unlock(other);
1802 	other->sk_data_ready(other);
1803 	sock_put(other);
1804 	return 0;
1805 
1806 out_unlock:
1807 	unix_state_unlock(other);
1808 	sock_put(other);
1809 out_free_skb:
1810 	consume_skb(skb);
1811 out_free_sk:
1812 	unix_release_sock(newsk, 0);
1813 out:
1814 	drop_peercred(&peercred);
1815 	return err;
1816 }
1817 
1818 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1819 {
1820 	struct unix_peercred ska_peercred = {}, skb_peercred = {};
1821 	struct sock *ska = socka->sk, *skb = sockb->sk;
1822 	int err;
1823 
1824 	err = prepare_peercred(&ska_peercred);
1825 	if (err)
1826 		return err;
1827 
1828 	err = prepare_peercred(&skb_peercred);
1829 	if (err) {
1830 		drop_peercred(&ska_peercred);
1831 		return err;
1832 	}
1833 
1834 	/* Join our sockets back to back */
1835 	sock_hold(ska);
1836 	sock_hold(skb);
1837 	unix_peer(ska) = skb;
1838 	unix_peer(skb) = ska;
1839 	init_peercred(ska, &ska_peercred);
1840 	init_peercred(skb, &skb_peercred);
1841 
1842 	ska->sk_state = TCP_ESTABLISHED;
1843 	skb->sk_state = TCP_ESTABLISHED;
1844 	socka->state  = SS_CONNECTED;
1845 	sockb->state  = SS_CONNECTED;
1846 	return 0;
1847 }
1848 
1849 static int unix_accept(struct socket *sock, struct socket *newsock,
1850 		       struct proto_accept_arg *arg)
1851 {
1852 	struct sock *sk = sock->sk;
1853 	struct sk_buff *skb;
1854 	struct sock *tsk;
1855 
1856 	arg->err = -EOPNOTSUPP;
1857 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1858 		goto out;
1859 
1860 	arg->err = -EINVAL;
1861 	if (READ_ONCE(sk->sk_state) != TCP_LISTEN)
1862 		goto out;
1863 
1864 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1865 	 * so that no locks are necessary.
1866 	 */
1867 
1868 	skb = skb_recv_datagram(sk, (arg->flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
1869 				&arg->err);
1870 	if (!skb) {
1871 		/* This means receive shutdown. */
1872 		if (arg->err == 0)
1873 			arg->err = -EINVAL;
1874 		goto out;
1875 	}
1876 
1877 	tsk = skb->sk;
1878 	skb_free_datagram(sk, skb);
1879 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1880 
1881 	if (tsk->sk_type == SOCK_STREAM)
1882 		set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags);
1883 
1884 	/* attach accepted sock to socket */
1885 	unix_state_lock(tsk);
1886 	unix_update_edges(unix_sk(tsk));
1887 	newsock->state = SS_CONNECTED;
1888 	sock_graft(tsk, newsock);
1889 	unix_state_unlock(tsk);
1890 	return 0;
1891 
1892 out:
1893 	return arg->err;
1894 }
1895 
1896 
1897 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1898 {
1899 	struct sock *sk = sock->sk;
1900 	struct unix_address *addr;
1901 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1902 	int err = 0;
1903 
1904 	if (peer) {
1905 		sk = unix_peer_get(sk);
1906 
1907 		err = -ENOTCONN;
1908 		if (!sk)
1909 			goto out;
1910 		err = 0;
1911 	} else {
1912 		sock_hold(sk);
1913 	}
1914 
1915 	addr = smp_load_acquire(&unix_sk(sk)->addr);
1916 	if (!addr) {
1917 		sunaddr->sun_family = AF_UNIX;
1918 		sunaddr->sun_path[0] = 0;
1919 		err = offsetof(struct sockaddr_un, sun_path);
1920 	} else {
1921 		err = addr->len;
1922 		memcpy(sunaddr, addr->name, addr->len);
1923 
1924 		if (peer)
1925 			BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err,
1926 					       CGROUP_UNIX_GETPEERNAME);
1927 		else
1928 			BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err,
1929 					       CGROUP_UNIX_GETSOCKNAME);
1930 	}
1931 	sock_put(sk);
1932 out:
1933 	return err;
1934 }
1935 
1936 /* The "user->unix_inflight" variable is protected by the garbage
1937  * collection lock, and we just read it locklessly here. If you go
1938  * over the limit, there might be a tiny race in actually noticing
1939  * it across threads. Tough.
1940  */
1941 static inline bool too_many_unix_fds(struct task_struct *p)
1942 {
1943 	struct user_struct *user = current_user();
1944 
1945 	if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE)))
1946 		return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1947 	return false;
1948 }
1949 
1950 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1951 {
1952 	if (too_many_unix_fds(current))
1953 		return -ETOOMANYREFS;
1954 
1955 	UNIXCB(skb).fp = scm->fp;
1956 	scm->fp = NULL;
1957 
1958 	if (unix_prepare_fpl(UNIXCB(skb).fp))
1959 		return -ENOMEM;
1960 
1961 	return 0;
1962 }
1963 
1964 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1965 {
1966 	scm->fp = UNIXCB(skb).fp;
1967 	UNIXCB(skb).fp = NULL;
1968 
1969 	unix_destroy_fpl(scm->fp);
1970 }
1971 
1972 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1973 {
1974 	scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1975 }
1976 
1977 static void unix_destruct_scm(struct sk_buff *skb)
1978 {
1979 	struct scm_cookie scm;
1980 
1981 	memset(&scm, 0, sizeof(scm));
1982 	scm.pid = UNIXCB(skb).pid;
1983 	if (UNIXCB(skb).fp)
1984 		unix_detach_fds(&scm, skb);
1985 
1986 	/* Alas, it calls VFS */
1987 	/* So fscking what? fput() had been SMP-safe since the last Summer */
1988 	scm_destroy(&scm);
1989 	sock_wfree(skb);
1990 }
1991 
1992 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1993 {
1994 	int err = 0;
1995 
1996 	UNIXCB(skb).pid = get_pid(scm->pid);
1997 	UNIXCB(skb).uid = scm->creds.uid;
1998 	UNIXCB(skb).gid = scm->creds.gid;
1999 	UNIXCB(skb).fp = NULL;
2000 	unix_get_secdata(scm, skb);
2001 	if (scm->fp && send_fds)
2002 		err = unix_attach_fds(scm, skb);
2003 
2004 	skb->destructor = unix_destruct_scm;
2005 	return err;
2006 }
2007 
2008 static void unix_skb_to_scm(struct sk_buff *skb, struct scm_cookie *scm)
2009 {
2010 	scm_set_cred(scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2011 	unix_set_secdata(scm, skb);
2012 }
2013 
2014 /**
2015  * unix_maybe_add_creds() - Adds current task uid/gid and struct pid to skb if needed.
2016  * @skb: skb to attach creds to.
2017  * @sk: Sender sock.
2018  * @other: Receiver sock.
2019  *
2020  * Some apps rely on write() giving SCM_CREDENTIALS
2021  * We include credentials if source or destination socket
2022  * asserted SOCK_PASSCRED.
2023  *
2024  * Context: May sleep.
2025  * Return: On success zero, on error a negative error code is returned.
2026  */
2027 static int unix_maybe_add_creds(struct sk_buff *skb, const struct sock *sk,
2028 				const struct sock *other)
2029 {
2030 	if (UNIXCB(skb).pid)
2031 		return 0;
2032 
2033 	if (unix_may_passcred(sk) || unix_may_passcred(other) ||
2034 	    !other->sk_socket) {
2035 		struct pid *pid;
2036 		int err;
2037 
2038 		pid = task_tgid(current);
2039 		err = pidfs_register_pid(pid);
2040 		if (unlikely(err))
2041 			return err;
2042 
2043 		UNIXCB(skb).pid = get_pid(pid);
2044 		current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
2045 	}
2046 
2047 	return 0;
2048 }
2049 
2050 static bool unix_skb_scm_eq(struct sk_buff *skb,
2051 			    struct scm_cookie *scm)
2052 {
2053 	return UNIXCB(skb).pid == scm->pid &&
2054 	       uid_eq(UNIXCB(skb).uid, scm->creds.uid) &&
2055 	       gid_eq(UNIXCB(skb).gid, scm->creds.gid) &&
2056 	       unix_secdata_eq(scm, skb);
2057 }
2058 
2059 static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
2060 {
2061 	struct scm_fp_list *fp = UNIXCB(skb).fp;
2062 	struct unix_sock *u = unix_sk(sk);
2063 
2064 	if (unlikely(fp && fp->count)) {
2065 		atomic_add(fp->count, &u->scm_stat.nr_fds);
2066 		unix_add_edges(fp, u);
2067 	}
2068 }
2069 
2070 static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
2071 {
2072 	struct scm_fp_list *fp = UNIXCB(skb).fp;
2073 	struct unix_sock *u = unix_sk(sk);
2074 
2075 	if (unlikely(fp && fp->count)) {
2076 		atomic_sub(fp->count, &u->scm_stat.nr_fds);
2077 		unix_del_edges(fp);
2078 	}
2079 }
2080 
2081 /*
2082  *	Send AF_UNIX data.
2083  */
2084 
2085 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
2086 			      size_t len)
2087 {
2088 	struct sock *sk = sock->sk, *other = NULL;
2089 	struct unix_sock *u = unix_sk(sk);
2090 	struct scm_cookie scm;
2091 	struct sk_buff *skb;
2092 	int data_len = 0;
2093 	int sk_locked;
2094 	long timeo;
2095 	int err;
2096 
2097 	err = scm_send(sock, msg, &scm, false);
2098 	if (err < 0)
2099 		return err;
2100 
2101 	if (msg->msg_flags & MSG_OOB) {
2102 		err = -EOPNOTSUPP;
2103 		goto out;
2104 	}
2105 
2106 	if (msg->msg_namelen) {
2107 		err = unix_validate_addr(msg->msg_name, msg->msg_namelen);
2108 		if (err)
2109 			goto out;
2110 
2111 		err = BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk,
2112 							    msg->msg_name,
2113 							    &msg->msg_namelen,
2114 							    NULL);
2115 		if (err)
2116 			goto out;
2117 	}
2118 
2119 	if (unix_may_passcred(sk) && !READ_ONCE(u->addr)) {
2120 		err = unix_autobind(sk);
2121 		if (err)
2122 			goto out;
2123 	}
2124 
2125 	if (len > READ_ONCE(sk->sk_sndbuf) - 32) {
2126 		err = -EMSGSIZE;
2127 		goto out;
2128 	}
2129 
2130 	if (len > SKB_MAX_ALLOC) {
2131 		data_len = min_t(size_t,
2132 				 len - SKB_MAX_ALLOC,
2133 				 MAX_SKB_FRAGS * PAGE_SIZE);
2134 		data_len = PAGE_ALIGN(data_len);
2135 
2136 		BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
2137 	}
2138 
2139 	skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
2140 				   msg->msg_flags & MSG_DONTWAIT, &err,
2141 				   PAGE_ALLOC_COSTLY_ORDER);
2142 	if (!skb)
2143 		goto out;
2144 
2145 	err = unix_scm_to_skb(&scm, skb, true);
2146 	if (err < 0)
2147 		goto out_free;
2148 
2149 	skb_put(skb, len - data_len);
2150 	skb->data_len = data_len;
2151 	skb->len = len;
2152 	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
2153 	if (err)
2154 		goto out_free;
2155 
2156 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
2157 
2158 	if (msg->msg_namelen) {
2159 lookup:
2160 		other = unix_find_other(sock_net(sk), msg->msg_name,
2161 					msg->msg_namelen, sk->sk_type, 0);
2162 		if (IS_ERR(other)) {
2163 			err = PTR_ERR(other);
2164 			goto out_free;
2165 		}
2166 	} else {
2167 		other = unix_peer_get(sk);
2168 		if (!other) {
2169 			err = -ENOTCONN;
2170 			goto out_free;
2171 		}
2172 	}
2173 
2174 	if (sk_filter(other, skb) < 0) {
2175 		/* Toss the packet but do not return any error to the sender */
2176 		err = len;
2177 		goto out_sock_put;
2178 	}
2179 
2180 	err = unix_maybe_add_creds(skb, sk, other);
2181 	if (err)
2182 		goto out_sock_put;
2183 
2184 restart:
2185 	sk_locked = 0;
2186 	unix_state_lock(other);
2187 restart_locked:
2188 
2189 	if (!unix_may_send(sk, other)) {
2190 		err = -EPERM;
2191 		goto out_unlock;
2192 	}
2193 
2194 	if (unlikely(sock_flag(other, SOCK_DEAD))) {
2195 		/* Check with 1003.1g - what should datagram error */
2196 
2197 		unix_state_unlock(other);
2198 
2199 		if (sk->sk_type == SOCK_SEQPACKET) {
2200 			/* We are here only when racing with unix_release_sock()
2201 			 * is clearing @other. Never change state to TCP_CLOSE
2202 			 * unlike SOCK_DGRAM wants.
2203 			 */
2204 			err = -EPIPE;
2205 			goto out_sock_put;
2206 		}
2207 
2208 		if (!sk_locked)
2209 			unix_state_lock(sk);
2210 
2211 		if (unix_peer(sk) == other) {
2212 			unix_peer(sk) = NULL;
2213 			unix_dgram_peer_wake_disconnect_wakeup(sk, other);
2214 
2215 			WRITE_ONCE(sk->sk_state, TCP_CLOSE);
2216 			unix_state_unlock(sk);
2217 
2218 			unix_dgram_disconnected(sk, other);
2219 			sock_put(other);
2220 			err = -ECONNREFUSED;
2221 			goto out_sock_put;
2222 		}
2223 
2224 		unix_state_unlock(sk);
2225 
2226 		if (!msg->msg_namelen) {
2227 			err = -ECONNRESET;
2228 			goto out_sock_put;
2229 		}
2230 
2231 		sock_put(other);
2232 		goto lookup;
2233 	}
2234 
2235 	if (other->sk_shutdown & RCV_SHUTDOWN) {
2236 		err = -EPIPE;
2237 		goto out_unlock;
2238 	}
2239 
2240 	if (UNIXCB(skb).fp && !other->sk_scm_rights) {
2241 		err = -EPERM;
2242 		goto out_unlock;
2243 	}
2244 
2245 	if (sk->sk_type != SOCK_SEQPACKET) {
2246 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
2247 		if (err)
2248 			goto out_unlock;
2249 	}
2250 
2251 	/* other == sk && unix_peer(other) != sk if
2252 	 * - unix_peer(sk) == NULL, destination address bound to sk
2253 	 * - unix_peer(sk) == sk by time of get but disconnected before lock
2254 	 */
2255 	if (other != sk &&
2256 	    unlikely(unix_peer(other) != sk &&
2257 	    unix_recvq_full_lockless(other))) {
2258 		if (timeo) {
2259 			timeo = unix_wait_for_peer(other, timeo);
2260 
2261 			err = sock_intr_errno(timeo);
2262 			if (signal_pending(current))
2263 				goto out_sock_put;
2264 
2265 			goto restart;
2266 		}
2267 
2268 		if (!sk_locked) {
2269 			unix_state_unlock(other);
2270 			unix_state_double_lock(sk, other);
2271 		}
2272 
2273 		if (unix_peer(sk) != other ||
2274 		    unix_dgram_peer_wake_me(sk, other)) {
2275 			err = -EAGAIN;
2276 			sk_locked = 1;
2277 			goto out_unlock;
2278 		}
2279 
2280 		if (!sk_locked) {
2281 			sk_locked = 1;
2282 			goto restart_locked;
2283 		}
2284 	}
2285 
2286 	if (unlikely(sk_locked))
2287 		unix_state_unlock(sk);
2288 
2289 	if (sock_flag(other, SOCK_RCVTSTAMP))
2290 		__net_timestamp(skb);
2291 
2292 	scm_stat_add(other, skb);
2293 	skb_queue_tail(&other->sk_receive_queue, skb);
2294 	unix_state_unlock(other);
2295 	other->sk_data_ready(other);
2296 	sock_put(other);
2297 	scm_destroy(&scm);
2298 	return len;
2299 
2300 out_unlock:
2301 	if (sk_locked)
2302 		unix_state_unlock(sk);
2303 	unix_state_unlock(other);
2304 out_sock_put:
2305 	sock_put(other);
2306 out_free:
2307 	consume_skb(skb);
2308 out:
2309 	scm_destroy(&scm);
2310 	return err;
2311 }
2312 
2313 /* We use paged skbs for stream sockets, and limit occupancy to 32768
2314  * bytes, and a minimum of a full page.
2315  */
2316 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
2317 
2318 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2319 static int queue_oob(struct sock *sk, struct msghdr *msg, struct sock *other,
2320 		     struct scm_cookie *scm, bool fds_sent)
2321 {
2322 	struct unix_sock *ousk = unix_sk(other);
2323 	struct sk_buff *skb;
2324 	int err;
2325 
2326 	skb = sock_alloc_send_skb(sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
2327 
2328 	if (!skb)
2329 		return err;
2330 
2331 	err = unix_scm_to_skb(scm, skb, !fds_sent);
2332 	if (err < 0)
2333 		goto out;
2334 
2335 	err = unix_maybe_add_creds(skb, sk, other);
2336 	if (err)
2337 		goto out;
2338 
2339 	skb_put(skb, 1);
2340 	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
2341 
2342 	if (err)
2343 		goto out;
2344 
2345 	unix_state_lock(other);
2346 
2347 	if (sock_flag(other, SOCK_DEAD) ||
2348 	    (other->sk_shutdown & RCV_SHUTDOWN)) {
2349 		err = -EPIPE;
2350 		goto out_unlock;
2351 	}
2352 
2353 	if (UNIXCB(skb).fp && !other->sk_scm_rights) {
2354 		err = -EPERM;
2355 		goto out_unlock;
2356 	}
2357 
2358 	scm_stat_add(other, skb);
2359 
2360 	spin_lock(&other->sk_receive_queue.lock);
2361 	WRITE_ONCE(ousk->oob_skb, skb);
2362 	WRITE_ONCE(ousk->inq_len, ousk->inq_len + 1);
2363 	__skb_queue_tail(&other->sk_receive_queue, skb);
2364 	spin_unlock(&other->sk_receive_queue.lock);
2365 
2366 	sk_send_sigurg(other);
2367 	unix_state_unlock(other);
2368 	other->sk_data_ready(other);
2369 
2370 	return 0;
2371 out_unlock:
2372 	unix_state_unlock(other);
2373 out:
2374 	consume_skb(skb);
2375 	return err;
2376 }
2377 #endif
2378 
2379 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
2380 			       size_t len)
2381 {
2382 	struct sock *sk = sock->sk;
2383 	struct sk_buff *skb = NULL;
2384 	struct sock *other = NULL;
2385 	struct unix_sock *otheru;
2386 	struct scm_cookie scm;
2387 	bool fds_sent = false;
2388 	int err, sent = 0;
2389 
2390 	err = scm_send(sock, msg, &scm, false);
2391 	if (err < 0)
2392 		return err;
2393 
2394 	if (msg->msg_flags & MSG_OOB) {
2395 		err = -EOPNOTSUPP;
2396 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2397 		if (len)
2398 			len--;
2399 		else
2400 #endif
2401 			goto out_err;
2402 	}
2403 
2404 	if (msg->msg_namelen) {
2405 		err = READ_ONCE(sk->sk_state) == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
2406 		goto out_err;
2407 	}
2408 
2409 	other = unix_peer(sk);
2410 	if (!other) {
2411 		err = -ENOTCONN;
2412 		goto out_err;
2413 	}
2414 
2415 	otheru = unix_sk(other);
2416 
2417 	if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
2418 		goto out_pipe;
2419 
2420 	while (sent < len) {
2421 		int size = len - sent;
2422 		int data_len;
2423 
2424 		if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
2425 			skb = sock_alloc_send_pskb(sk, 0, 0,
2426 						   msg->msg_flags & MSG_DONTWAIT,
2427 						   &err, 0);
2428 		} else {
2429 			/* Keep two messages in the pipe so it schedules better */
2430 			size = min_t(int, size, (READ_ONCE(sk->sk_sndbuf) >> 1) - 64);
2431 
2432 			/* allow fallback to order-0 allocations */
2433 			size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
2434 
2435 			data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
2436 
2437 			data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
2438 
2439 			skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
2440 						   msg->msg_flags & MSG_DONTWAIT, &err,
2441 						   get_order(UNIX_SKB_FRAGS_SZ));
2442 		}
2443 		if (!skb)
2444 			goto out_err;
2445 
2446 		/* Only send the fds in the first buffer */
2447 		err = unix_scm_to_skb(&scm, skb, !fds_sent);
2448 		if (err < 0)
2449 			goto out_free;
2450 
2451 		fds_sent = true;
2452 
2453 		err = unix_maybe_add_creds(skb, sk, other);
2454 		if (err)
2455 			goto out_free;
2456 
2457 		if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
2458 			skb->ip_summed = CHECKSUM_UNNECESSARY;
2459 			err = skb_splice_from_iter(skb, &msg->msg_iter, size);
2460 			if (err < 0)
2461 				goto out_free;
2462 
2463 			size = err;
2464 			refcount_add(size, &sk->sk_wmem_alloc);
2465 		} else {
2466 			skb_put(skb, size - data_len);
2467 			skb->data_len = data_len;
2468 			skb->len = size;
2469 			err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
2470 			if (err)
2471 				goto out_free;
2472 		}
2473 
2474 		unix_state_lock(other);
2475 
2476 		if (sock_flag(other, SOCK_DEAD) ||
2477 		    (other->sk_shutdown & RCV_SHUTDOWN))
2478 			goto out_pipe_unlock;
2479 
2480 		if (UNIXCB(skb).fp && !other->sk_scm_rights) {
2481 			unix_state_unlock(other);
2482 			err = -EPERM;
2483 			goto out_free;
2484 		}
2485 
2486 		scm_stat_add(other, skb);
2487 
2488 		spin_lock(&other->sk_receive_queue.lock);
2489 		WRITE_ONCE(otheru->inq_len, otheru->inq_len + skb->len);
2490 		__skb_queue_tail(&other->sk_receive_queue, skb);
2491 		spin_unlock(&other->sk_receive_queue.lock);
2492 
2493 		unix_state_unlock(other);
2494 		other->sk_data_ready(other);
2495 		sent += size;
2496 	}
2497 
2498 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2499 	if (msg->msg_flags & MSG_OOB) {
2500 		err = queue_oob(sk, msg, other, &scm, fds_sent);
2501 		if (err)
2502 			goto out_err;
2503 		sent++;
2504 	}
2505 #endif
2506 
2507 	scm_destroy(&scm);
2508 
2509 	return sent;
2510 
2511 out_pipe_unlock:
2512 	unix_state_unlock(other);
2513 out_pipe:
2514 	if (!sent && !(msg->msg_flags & MSG_NOSIGNAL))
2515 		send_sig(SIGPIPE, current, 0);
2516 	err = -EPIPE;
2517 out_free:
2518 	consume_skb(skb);
2519 out_err:
2520 	scm_destroy(&scm);
2521 	return sent ? : err;
2522 }
2523 
2524 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2525 				  size_t len)
2526 {
2527 	int err;
2528 	struct sock *sk = sock->sk;
2529 
2530 	err = sock_error(sk);
2531 	if (err)
2532 		return err;
2533 
2534 	if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)
2535 		return -ENOTCONN;
2536 
2537 	if (msg->msg_namelen)
2538 		msg->msg_namelen = 0;
2539 
2540 	return unix_dgram_sendmsg(sock, msg, len);
2541 }
2542 
2543 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2544 				  size_t size, int flags)
2545 {
2546 	struct sock *sk = sock->sk;
2547 
2548 	if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)
2549 		return -ENOTCONN;
2550 
2551 	return unix_dgram_recvmsg(sock, msg, size, flags);
2552 }
2553 
2554 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2555 {
2556 	struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2557 
2558 	if (addr) {
2559 		msg->msg_namelen = addr->len;
2560 		memcpy(msg->msg_name, addr->name, addr->len);
2561 	}
2562 }
2563 
2564 int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
2565 			 int flags)
2566 {
2567 	struct scm_cookie scm;
2568 	struct socket *sock = sk->sk_socket;
2569 	struct unix_sock *u = unix_sk(sk);
2570 	struct sk_buff *skb, *last;
2571 	long timeo;
2572 	int skip;
2573 	int err;
2574 
2575 	err = -EOPNOTSUPP;
2576 	if (flags&MSG_OOB)
2577 		goto out;
2578 
2579 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2580 
2581 	do {
2582 		mutex_lock(&u->iolock);
2583 
2584 		skip = sk_peek_offset(sk, flags);
2585 		skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2586 					      &skip, &err, &last);
2587 		if (skb) {
2588 			if (!(flags & MSG_PEEK))
2589 				scm_stat_del(sk, skb);
2590 			break;
2591 		}
2592 
2593 		mutex_unlock(&u->iolock);
2594 
2595 		if (err != -EAGAIN)
2596 			break;
2597 	} while (timeo &&
2598 		 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2599 					      &err, &timeo, last));
2600 
2601 	if (!skb) { /* implies iolock unlocked */
2602 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2603 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2604 		    (READ_ONCE(sk->sk_shutdown) & RCV_SHUTDOWN))
2605 			err = 0;
2606 		goto out;
2607 	}
2608 
2609 	if (wq_has_sleeper(&u->peer_wait))
2610 		wake_up_interruptible_sync_poll(&u->peer_wait,
2611 						EPOLLOUT | EPOLLWRNORM |
2612 						EPOLLWRBAND);
2613 
2614 	if (msg->msg_name) {
2615 		unix_copy_addr(msg, skb->sk);
2616 
2617 		BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk,
2618 						      msg->msg_name,
2619 						      &msg->msg_namelen);
2620 	}
2621 
2622 	if (size > skb->len - skip)
2623 		size = skb->len - skip;
2624 	else if (size < skb->len - skip)
2625 		msg->msg_flags |= MSG_TRUNC;
2626 
2627 	err = skb_copy_datagram_msg(skb, skip, msg, size);
2628 	if (err)
2629 		goto out_free;
2630 
2631 	if (sock_flag(sk, SOCK_RCVTSTAMP))
2632 		__sock_recv_timestamp(msg, sk, skb);
2633 
2634 	memset(&scm, 0, sizeof(scm));
2635 
2636 	unix_skb_to_scm(skb, &scm);
2637 
2638 	if (!(flags & MSG_PEEK)) {
2639 		if (UNIXCB(skb).fp)
2640 			unix_detach_fds(&scm, skb);
2641 
2642 		sk_peek_offset_bwd(sk, skb->len);
2643 	} else {
2644 		/* It is questionable: on PEEK we could:
2645 		   - do not return fds - good, but too simple 8)
2646 		   - return fds, and do not return them on read (old strategy,
2647 		     apparently wrong)
2648 		   - clone fds (I chose it for now, it is the most universal
2649 		     solution)
2650 
2651 		   POSIX 1003.1g does not actually define this clearly
2652 		   at all. POSIX 1003.1g doesn't define a lot of things
2653 		   clearly however!
2654 
2655 		*/
2656 
2657 		sk_peek_offset_fwd(sk, size);
2658 
2659 		if (UNIXCB(skb).fp)
2660 			unix_peek_fds(&scm, skb);
2661 	}
2662 	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2663 
2664 	scm_recv_unix(sock, msg, &scm, flags);
2665 
2666 out_free:
2667 	skb_free_datagram(sk, skb);
2668 	mutex_unlock(&u->iolock);
2669 out:
2670 	return err;
2671 }
2672 
2673 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
2674 			      int flags)
2675 {
2676 	struct sock *sk = sock->sk;
2677 
2678 #ifdef CONFIG_BPF_SYSCALL
2679 	const struct proto *prot = READ_ONCE(sk->sk_prot);
2680 
2681 	if (prot != &unix_dgram_proto)
2682 		return prot->recvmsg(sk, msg, size, flags, NULL);
2683 #endif
2684 	return __unix_dgram_recvmsg(sk, msg, size, flags);
2685 }
2686 
2687 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
2688 {
2689 	struct unix_sock *u = unix_sk(sk);
2690 	struct sk_buff *skb;
2691 	int err;
2692 
2693 	mutex_lock(&u->iolock);
2694 	skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
2695 	mutex_unlock(&u->iolock);
2696 	if (!skb)
2697 		return err;
2698 
2699 	return recv_actor(sk, skb);
2700 }
2701 
2702 /*
2703  *	Sleep until more data has arrived. But check for races..
2704  */
2705 static long unix_stream_data_wait(struct sock *sk, long timeo,
2706 				  struct sk_buff *last, unsigned int last_len,
2707 				  bool freezable)
2708 {
2709 	unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE;
2710 	struct sk_buff *tail;
2711 	DEFINE_WAIT(wait);
2712 
2713 	unix_state_lock(sk);
2714 
2715 	for (;;) {
2716 		prepare_to_wait(sk_sleep(sk), &wait, state);
2717 
2718 		tail = skb_peek_tail(&sk->sk_receive_queue);
2719 		if (tail != last ||
2720 		    (tail && tail->len != last_len) ||
2721 		    sk->sk_err ||
2722 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
2723 		    signal_pending(current) ||
2724 		    !timeo)
2725 			break;
2726 
2727 		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2728 		unix_state_unlock(sk);
2729 		timeo = schedule_timeout(timeo);
2730 		unix_state_lock(sk);
2731 
2732 		if (sock_flag(sk, SOCK_DEAD))
2733 			break;
2734 
2735 		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2736 	}
2737 
2738 	finish_wait(sk_sleep(sk), &wait);
2739 	unix_state_unlock(sk);
2740 	return timeo;
2741 }
2742 
2743 struct unix_stream_read_state {
2744 	int (*recv_actor)(struct sk_buff *, int, int,
2745 			  struct unix_stream_read_state *);
2746 	struct socket *socket;
2747 	struct msghdr *msg;
2748 	struct pipe_inode_info *pipe;
2749 	size_t size;
2750 	int flags;
2751 	unsigned int splice_flags;
2752 };
2753 
2754 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2755 static int unix_stream_recv_urg(struct unix_stream_read_state *state)
2756 {
2757 	struct sk_buff *oob_skb, *read_skb = NULL;
2758 	struct socket *sock = state->socket;
2759 	struct sock *sk = sock->sk;
2760 	struct unix_sock *u = unix_sk(sk);
2761 	int chunk = 1;
2762 
2763 	mutex_lock(&u->iolock);
2764 	unix_state_lock(sk);
2765 	spin_lock(&sk->sk_receive_queue.lock);
2766 
2767 	if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
2768 		spin_unlock(&sk->sk_receive_queue.lock);
2769 		unix_state_unlock(sk);
2770 		mutex_unlock(&u->iolock);
2771 		return -EINVAL;
2772 	}
2773 
2774 	oob_skb = u->oob_skb;
2775 
2776 	if (!(state->flags & MSG_PEEK)) {
2777 		WRITE_ONCE(u->oob_skb, NULL);
2778 		WRITE_ONCE(u->inq_len, u->inq_len - 1);
2779 
2780 		if (oob_skb->prev != (struct sk_buff *)&sk->sk_receive_queue &&
2781 		    !unix_skb_len(oob_skb->prev)) {
2782 			read_skb = oob_skb->prev;
2783 			__skb_unlink(read_skb, &sk->sk_receive_queue);
2784 		}
2785 	}
2786 
2787 	spin_unlock(&sk->sk_receive_queue.lock);
2788 	unix_state_unlock(sk);
2789 
2790 	chunk = state->recv_actor(oob_skb, 0, chunk, state);
2791 
2792 	if (!(state->flags & MSG_PEEK))
2793 		UNIXCB(oob_skb).consumed += 1;
2794 
2795 	mutex_unlock(&u->iolock);
2796 
2797 	consume_skb(read_skb);
2798 
2799 	if (chunk < 0)
2800 		return -EFAULT;
2801 
2802 	state->msg->msg_flags |= MSG_OOB;
2803 	return 1;
2804 }
2805 
2806 static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
2807 				  int flags, int copied)
2808 {
2809 	struct sk_buff *read_skb = NULL, *unread_skb = NULL;
2810 	struct unix_sock *u = unix_sk(sk);
2811 
2812 	if (likely(unix_skb_len(skb) && skb != READ_ONCE(u->oob_skb)))
2813 		return skb;
2814 
2815 	spin_lock(&sk->sk_receive_queue.lock);
2816 
2817 	if (!unix_skb_len(skb)) {
2818 		if (copied && (!u->oob_skb || skb == u->oob_skb)) {
2819 			skb = NULL;
2820 		} else if (flags & MSG_PEEK) {
2821 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2822 		} else {
2823 			read_skb = skb;
2824 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2825 			__skb_unlink(read_skb, &sk->sk_receive_queue);
2826 		}
2827 
2828 		if (!skb)
2829 			goto unlock;
2830 	}
2831 
2832 	if (skb != u->oob_skb)
2833 		goto unlock;
2834 
2835 	if (copied) {
2836 		skb = NULL;
2837 	} else if (!(flags & MSG_PEEK)) {
2838 		WRITE_ONCE(u->oob_skb, NULL);
2839 
2840 		if (!sock_flag(sk, SOCK_URGINLINE)) {
2841 			__skb_unlink(skb, &sk->sk_receive_queue);
2842 			unread_skb = skb;
2843 			skb = skb_peek(&sk->sk_receive_queue);
2844 		}
2845 	} else if (!sock_flag(sk, SOCK_URGINLINE)) {
2846 		skb = skb_peek_next(skb, &sk->sk_receive_queue);
2847 	}
2848 
2849 unlock:
2850 	spin_unlock(&sk->sk_receive_queue.lock);
2851 
2852 	consume_skb(read_skb);
2853 	kfree_skb_reason(unread_skb, SKB_DROP_REASON_UNIX_SKIP_OOB);
2854 
2855 	return skb;
2856 }
2857 #endif
2858 
2859 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
2860 {
2861 	struct sk_buff_head *queue = &sk->sk_receive_queue;
2862 	struct unix_sock *u = unix_sk(sk);
2863 	struct sk_buff *skb;
2864 	int err;
2865 
2866 	if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED))
2867 		return -ENOTCONN;
2868 
2869 	err = sock_error(sk);
2870 	if (err)
2871 		return err;
2872 
2873 	mutex_lock(&u->iolock);
2874 	spin_lock(&queue->lock);
2875 
2876 	skb = __skb_dequeue(queue);
2877 	if (!skb) {
2878 		spin_unlock(&queue->lock);
2879 		mutex_unlock(&u->iolock);
2880 		return -EAGAIN;
2881 	}
2882 
2883 	WRITE_ONCE(u->inq_len, u->inq_len - skb->len);
2884 
2885 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2886 	if (skb == u->oob_skb) {
2887 		WRITE_ONCE(u->oob_skb, NULL);
2888 		spin_unlock(&queue->lock);
2889 		mutex_unlock(&u->iolock);
2890 
2891 		kfree_skb_reason(skb, SKB_DROP_REASON_UNIX_SKIP_OOB);
2892 		return -EAGAIN;
2893 	}
2894 #endif
2895 
2896 	spin_unlock(&queue->lock);
2897 	mutex_unlock(&u->iolock);
2898 
2899 	return recv_actor(sk, skb);
2900 }
2901 
2902 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2903 				    bool freezable)
2904 {
2905 	int noblock = state->flags & MSG_DONTWAIT;
2906 	struct socket *sock = state->socket;
2907 	struct msghdr *msg = state->msg;
2908 	struct sock *sk = sock->sk;
2909 	size_t size = state->size;
2910 	int flags = state->flags;
2911 	bool check_creds = false;
2912 	struct scm_cookie scm;
2913 	unsigned int last_len;
2914 	struct unix_sock *u;
2915 	int copied = 0;
2916 	int err = 0;
2917 	long timeo;
2918 	int target;
2919 	int skip;
2920 
2921 	if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)) {
2922 		err = -EINVAL;
2923 		goto out;
2924 	}
2925 
2926 	if (unlikely(flags & MSG_OOB)) {
2927 		err = -EOPNOTSUPP;
2928 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2929 		err = unix_stream_recv_urg(state);
2930 #endif
2931 		goto out;
2932 	}
2933 
2934 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2935 	timeo = sock_rcvtimeo(sk, noblock);
2936 
2937 	memset(&scm, 0, sizeof(scm));
2938 
2939 	u = unix_sk(sk);
2940 
2941 redo:
2942 	/* Lock the socket to prevent queue disordering
2943 	 * while sleeps in memcpy_tomsg
2944 	 */
2945 	mutex_lock(&u->iolock);
2946 
2947 	skip = max(sk_peek_offset(sk, flags), 0);
2948 
2949 	do {
2950 		struct sk_buff *skb, *last;
2951 		int chunk;
2952 
2953 		unix_state_lock(sk);
2954 		if (sock_flag(sk, SOCK_DEAD)) {
2955 			err = -ECONNRESET;
2956 			goto unlock;
2957 		}
2958 		last = skb = skb_peek(&sk->sk_receive_queue);
2959 		last_len = last ? last->len : 0;
2960 
2961 again:
2962 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2963 		if (skb) {
2964 			skb = manage_oob(skb, sk, flags, copied);
2965 			if (!skb && copied) {
2966 				unix_state_unlock(sk);
2967 				break;
2968 			}
2969 		}
2970 #endif
2971 		if (skb == NULL) {
2972 			if (copied >= target)
2973 				goto unlock;
2974 
2975 			/*
2976 			 *	POSIX 1003.1g mandates this order.
2977 			 */
2978 
2979 			err = sock_error(sk);
2980 			if (err)
2981 				goto unlock;
2982 			if (sk->sk_shutdown & RCV_SHUTDOWN)
2983 				goto unlock;
2984 
2985 			unix_state_unlock(sk);
2986 			if (!timeo) {
2987 				err = -EAGAIN;
2988 				break;
2989 			}
2990 
2991 			mutex_unlock(&u->iolock);
2992 
2993 			timeo = unix_stream_data_wait(sk, timeo, last,
2994 						      last_len, freezable);
2995 
2996 			if (signal_pending(current)) {
2997 				err = sock_intr_errno(timeo);
2998 				scm_destroy(&scm);
2999 				goto out;
3000 			}
3001 
3002 			goto redo;
3003 unlock:
3004 			unix_state_unlock(sk);
3005 			break;
3006 		}
3007 
3008 		while (skip >= unix_skb_len(skb)) {
3009 			skip -= unix_skb_len(skb);
3010 			last = skb;
3011 			last_len = skb->len;
3012 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
3013 			if (!skb)
3014 				goto again;
3015 		}
3016 
3017 		unix_state_unlock(sk);
3018 
3019 		if (check_creds) {
3020 			/* Never glue messages from different writers */
3021 			if (!unix_skb_scm_eq(skb, &scm))
3022 				break;
3023 		} else if (unix_may_passcred(sk)) {
3024 			/* Copy credentials */
3025 			unix_skb_to_scm(skb, &scm);
3026 			check_creds = true;
3027 		}
3028 
3029 		/* Copy address just once */
3030 		if (msg && msg->msg_name) {
3031 			DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
3032 
3033 			unix_copy_addr(msg, skb->sk);
3034 			BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, msg->msg_name,
3035 							      &msg->msg_namelen);
3036 
3037 			sunaddr = NULL;
3038 		}
3039 
3040 		chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
3041 		chunk = state->recv_actor(skb, skip, chunk, state);
3042 		if (chunk < 0) {
3043 			if (copied == 0)
3044 				copied = -EFAULT;
3045 			break;
3046 		}
3047 		copied += chunk;
3048 		size -= chunk;
3049 
3050 		/* Mark read part of skb as used */
3051 		if (!(flags & MSG_PEEK)) {
3052 			UNIXCB(skb).consumed += chunk;
3053 
3054 			sk_peek_offset_bwd(sk, chunk);
3055 
3056 			if (UNIXCB(skb).fp) {
3057 				scm_stat_del(sk, skb);
3058 				unix_detach_fds(&scm, skb);
3059 			}
3060 
3061 			if (unix_skb_len(skb))
3062 				break;
3063 
3064 			spin_lock(&sk->sk_receive_queue.lock);
3065 			WRITE_ONCE(u->inq_len, u->inq_len - skb->len);
3066 			__skb_unlink(skb, &sk->sk_receive_queue);
3067 			spin_unlock(&sk->sk_receive_queue.lock);
3068 
3069 			consume_skb(skb);
3070 
3071 			if (scm.fp)
3072 				break;
3073 		} else {
3074 			/* It is questionable, see note in unix_dgram_recvmsg.
3075 			 */
3076 			if (UNIXCB(skb).fp)
3077 				unix_peek_fds(&scm, skb);
3078 
3079 			sk_peek_offset_fwd(sk, chunk);
3080 
3081 			if (UNIXCB(skb).fp)
3082 				break;
3083 
3084 			skip = 0;
3085 			last = skb;
3086 			last_len = skb->len;
3087 			unix_state_lock(sk);
3088 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
3089 			if (skb)
3090 				goto again;
3091 			unix_state_unlock(sk);
3092 			break;
3093 		}
3094 	} while (size);
3095 
3096 	mutex_unlock(&u->iolock);
3097 	if (msg) {
3098 		scm_recv_unix(sock, msg, &scm, flags);
3099 
3100 		if (READ_ONCE(u->recvmsg_inq) || msg->msg_get_inq) {
3101 			msg->msg_inq = READ_ONCE(u->inq_len);
3102 			put_cmsg(msg, SOL_SOCKET, SCM_INQ,
3103 				 sizeof(msg->msg_inq), &msg->msg_inq);
3104 		}
3105 	} else {
3106 		scm_destroy(&scm);
3107 	}
3108 out:
3109 	return copied ? : err;
3110 }
3111 
3112 static int unix_stream_read_actor(struct sk_buff *skb,
3113 				  int skip, int chunk,
3114 				  struct unix_stream_read_state *state)
3115 {
3116 	int ret;
3117 
3118 	ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
3119 				    state->msg, chunk);
3120 	return ret ?: chunk;
3121 }
3122 
3123 int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg,
3124 			  size_t size, int flags)
3125 {
3126 	struct unix_stream_read_state state = {
3127 		.recv_actor = unix_stream_read_actor,
3128 		.socket = sk->sk_socket,
3129 		.msg = msg,
3130 		.size = size,
3131 		.flags = flags
3132 	};
3133 
3134 	return unix_stream_read_generic(&state, true);
3135 }
3136 
3137 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
3138 			       size_t size, int flags)
3139 {
3140 	struct unix_stream_read_state state = {
3141 		.recv_actor = unix_stream_read_actor,
3142 		.socket = sock,
3143 		.msg = msg,
3144 		.size = size,
3145 		.flags = flags
3146 	};
3147 
3148 #ifdef CONFIG_BPF_SYSCALL
3149 	struct sock *sk = sock->sk;
3150 	const struct proto *prot = READ_ONCE(sk->sk_prot);
3151 
3152 	if (prot != &unix_stream_proto)
3153 		return prot->recvmsg(sk, msg, size, flags, NULL);
3154 #endif
3155 	return unix_stream_read_generic(&state, true);
3156 }
3157 
3158 static int unix_stream_splice_actor(struct sk_buff *skb,
3159 				    int skip, int chunk,
3160 				    struct unix_stream_read_state *state)
3161 {
3162 	return skb_splice_bits(skb, state->socket->sk,
3163 			       UNIXCB(skb).consumed + skip,
3164 			       state->pipe, chunk, state->splice_flags);
3165 }
3166 
3167 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
3168 				       struct pipe_inode_info *pipe,
3169 				       size_t size, unsigned int flags)
3170 {
3171 	struct unix_stream_read_state state = {
3172 		.recv_actor = unix_stream_splice_actor,
3173 		.socket = sock,
3174 		.pipe = pipe,
3175 		.size = size,
3176 		.splice_flags = flags,
3177 	};
3178 
3179 	if (unlikely(*ppos))
3180 		return -ESPIPE;
3181 
3182 	if (sock->file->f_flags & O_NONBLOCK ||
3183 	    flags & SPLICE_F_NONBLOCK)
3184 		state.flags = MSG_DONTWAIT;
3185 
3186 	return unix_stream_read_generic(&state, false);
3187 }
3188 
3189 static int unix_shutdown(struct socket *sock, int mode)
3190 {
3191 	struct sock *sk = sock->sk;
3192 	struct sock *other;
3193 
3194 	if (mode < SHUT_RD || mode > SHUT_RDWR)
3195 		return -EINVAL;
3196 	/* This maps:
3197 	 * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
3198 	 * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
3199 	 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
3200 	 */
3201 	++mode;
3202 
3203 	unix_state_lock(sk);
3204 	WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode);
3205 	other = unix_peer(sk);
3206 	if (other)
3207 		sock_hold(other);
3208 	unix_state_unlock(sk);
3209 	sk->sk_state_change(sk);
3210 
3211 	if (other &&
3212 		(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
3213 
3214 		int peer_mode = 0;
3215 		const struct proto *prot = READ_ONCE(other->sk_prot);
3216 
3217 		if (prot->unhash)
3218 			prot->unhash(other);
3219 		if (mode&RCV_SHUTDOWN)
3220 			peer_mode |= SEND_SHUTDOWN;
3221 		if (mode&SEND_SHUTDOWN)
3222 			peer_mode |= RCV_SHUTDOWN;
3223 		unix_state_lock(other);
3224 		WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode);
3225 		unix_state_unlock(other);
3226 		other->sk_state_change(other);
3227 		if (peer_mode == SHUTDOWN_MASK)
3228 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
3229 		else if (peer_mode & RCV_SHUTDOWN)
3230 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
3231 	}
3232 	if (other)
3233 		sock_put(other);
3234 
3235 	return 0;
3236 }
3237 
3238 long unix_inq_len(struct sock *sk)
3239 {
3240 	struct sk_buff *skb;
3241 	long amount = 0;
3242 
3243 	if (READ_ONCE(sk->sk_state) == TCP_LISTEN)
3244 		return -EINVAL;
3245 
3246 	if (sk->sk_type == SOCK_STREAM)
3247 		return READ_ONCE(unix_sk(sk)->inq_len);
3248 
3249 	spin_lock(&sk->sk_receive_queue.lock);
3250 	if (sk->sk_type == SOCK_SEQPACKET) {
3251 		skb_queue_walk(&sk->sk_receive_queue, skb)
3252 			amount += unix_skb_len(skb);
3253 	} else {
3254 		skb = skb_peek(&sk->sk_receive_queue);
3255 		if (skb)
3256 			amount = skb->len;
3257 	}
3258 	spin_unlock(&sk->sk_receive_queue.lock);
3259 
3260 	return amount;
3261 }
3262 EXPORT_SYMBOL_GPL(unix_inq_len);
3263 
3264 long unix_outq_len(struct sock *sk)
3265 {
3266 	return sk_wmem_alloc_get(sk);
3267 }
3268 EXPORT_SYMBOL_GPL(unix_outq_len);
3269 
3270 static int unix_open_file(struct sock *sk)
3271 {
3272 	struct file *f;
3273 	int fd;
3274 
3275 	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
3276 		return -EPERM;
3277 
3278 	if (!smp_load_acquire(&unix_sk(sk)->addr))
3279 		return -ENOENT;
3280 
3281 	if (!unix_sk(sk)->path.dentry)
3282 		return -ENOENT;
3283 
3284 	fd = get_unused_fd_flags(O_CLOEXEC);
3285 	if (fd < 0)
3286 		return fd;
3287 
3288 	f = dentry_open(&unix_sk(sk)->path, O_PATH, current_cred());
3289 	if (IS_ERR(f)) {
3290 		put_unused_fd(fd);
3291 		return PTR_ERR(f);
3292 	}
3293 
3294 	fd_install(fd, f);
3295 	return fd;
3296 }
3297 
3298 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3299 {
3300 	struct sock *sk = sock->sk;
3301 	long amount = 0;
3302 	int err;
3303 
3304 	switch (cmd) {
3305 	case SIOCOUTQ:
3306 		amount = unix_outq_len(sk);
3307 		err = put_user(amount, (int __user *)arg);
3308 		break;
3309 	case SIOCINQ:
3310 		amount = unix_inq_len(sk);
3311 		if (amount < 0)
3312 			err = amount;
3313 		else
3314 			err = put_user(amount, (int __user *)arg);
3315 		break;
3316 	case SIOCUNIXFILE:
3317 		err = unix_open_file(sk);
3318 		break;
3319 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3320 	case SIOCATMARK:
3321 		{
3322 			struct unix_sock *u = unix_sk(sk);
3323 			struct sk_buff *skb;
3324 			int answ = 0;
3325 
3326 			mutex_lock(&u->iolock);
3327 
3328 			skb = skb_peek(&sk->sk_receive_queue);
3329 			if (skb) {
3330 				struct sk_buff *oob_skb = READ_ONCE(u->oob_skb);
3331 				struct sk_buff *next_skb;
3332 
3333 				next_skb = skb_peek_next(skb, &sk->sk_receive_queue);
3334 
3335 				if (skb == oob_skb ||
3336 				    (!unix_skb_len(skb) &&
3337 				     (!oob_skb || next_skb == oob_skb)))
3338 					answ = 1;
3339 			}
3340 
3341 			mutex_unlock(&u->iolock);
3342 
3343 			err = put_user(answ, (int __user *)arg);
3344 		}
3345 		break;
3346 #endif
3347 	default:
3348 		err = -ENOIOCTLCMD;
3349 		break;
3350 	}
3351 	return err;
3352 }
3353 
3354 #ifdef CONFIG_COMPAT
3355 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3356 {
3357 	return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
3358 }
3359 #endif
3360 
3361 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
3362 {
3363 	struct sock *sk = sock->sk;
3364 	unsigned char state;
3365 	__poll_t mask;
3366 	u8 shutdown;
3367 
3368 	sock_poll_wait(file, sock, wait);
3369 	mask = 0;
3370 	shutdown = READ_ONCE(sk->sk_shutdown);
3371 	state = READ_ONCE(sk->sk_state);
3372 
3373 	/* exceptional events? */
3374 	if (READ_ONCE(sk->sk_err))
3375 		mask |= EPOLLERR;
3376 	if (shutdown == SHUTDOWN_MASK)
3377 		mask |= EPOLLHUP;
3378 	if (shutdown & RCV_SHUTDOWN)
3379 		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3380 
3381 	/* readable? */
3382 	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3383 		mask |= EPOLLIN | EPOLLRDNORM;
3384 	if (sk_is_readable(sk))
3385 		mask |= EPOLLIN | EPOLLRDNORM;
3386 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3387 	if (READ_ONCE(unix_sk(sk)->oob_skb))
3388 		mask |= EPOLLPRI;
3389 #endif
3390 
3391 	/* Connection-based need to check for termination and startup */
3392 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
3393 	    state == TCP_CLOSE)
3394 		mask |= EPOLLHUP;
3395 
3396 	/*
3397 	 * we set writable also when the other side has shut down the
3398 	 * connection. This prevents stuck sockets.
3399 	 */
3400 	if (unix_writable(sk, state))
3401 		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3402 
3403 	return mask;
3404 }
3405 
3406 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
3407 				    poll_table *wait)
3408 {
3409 	struct sock *sk = sock->sk, *other;
3410 	unsigned int writable;
3411 	unsigned char state;
3412 	__poll_t mask;
3413 	u8 shutdown;
3414 
3415 	sock_poll_wait(file, sock, wait);
3416 	mask = 0;
3417 	shutdown = READ_ONCE(sk->sk_shutdown);
3418 	state = READ_ONCE(sk->sk_state);
3419 
3420 	/* exceptional events? */
3421 	if (READ_ONCE(sk->sk_err) ||
3422 	    !skb_queue_empty_lockless(&sk->sk_error_queue))
3423 		mask |= EPOLLERR |
3424 			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
3425 
3426 	if (shutdown & RCV_SHUTDOWN)
3427 		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3428 	if (shutdown == SHUTDOWN_MASK)
3429 		mask |= EPOLLHUP;
3430 
3431 	/* readable? */
3432 	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3433 		mask |= EPOLLIN | EPOLLRDNORM;
3434 	if (sk_is_readable(sk))
3435 		mask |= EPOLLIN | EPOLLRDNORM;
3436 
3437 	/* Connection-based need to check for termination and startup */
3438 	if (sk->sk_type == SOCK_SEQPACKET && state == TCP_CLOSE)
3439 		mask |= EPOLLHUP;
3440 
3441 	/* No write status requested, avoid expensive OUT tests. */
3442 	if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
3443 		return mask;
3444 
3445 	writable = unix_writable(sk, state);
3446 	if (writable) {
3447 		unix_state_lock(sk);
3448 
3449 		other = unix_peer(sk);
3450 		if (other && unix_peer(other) != sk &&
3451 		    unix_recvq_full_lockless(other) &&
3452 		    unix_dgram_peer_wake_me(sk, other))
3453 			writable = 0;
3454 
3455 		unix_state_unlock(sk);
3456 	}
3457 
3458 	if (writable)
3459 		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3460 	else
3461 		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
3462 
3463 	return mask;
3464 }
3465 
3466 #ifdef CONFIG_PROC_FS
3467 
3468 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
3469 
3470 #define get_bucket(x) ((x) >> BUCKET_SPACE)
3471 #define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1))
3472 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
3473 
3474 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
3475 {
3476 	unsigned long offset = get_offset(*pos);
3477 	unsigned long bucket = get_bucket(*pos);
3478 	unsigned long count = 0;
3479 	struct sock *sk;
3480 
3481 	for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]);
3482 	     sk; sk = sk_next(sk)) {
3483 		if (++count == offset)
3484 			break;
3485 	}
3486 
3487 	return sk;
3488 }
3489 
3490 static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos)
3491 {
3492 	unsigned long bucket = get_bucket(*pos);
3493 	struct net *net = seq_file_net(seq);
3494 	struct sock *sk;
3495 
3496 	while (bucket < UNIX_HASH_SIZE) {
3497 		spin_lock(&net->unx.table.locks[bucket]);
3498 
3499 		sk = unix_from_bucket(seq, pos);
3500 		if (sk)
3501 			return sk;
3502 
3503 		spin_unlock(&net->unx.table.locks[bucket]);
3504 
3505 		*pos = set_bucket_offset(++bucket, 1);
3506 	}
3507 
3508 	return NULL;
3509 }
3510 
3511 static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk,
3512 				  loff_t *pos)
3513 {
3514 	unsigned long bucket = get_bucket(*pos);
3515 
3516 	sk = sk_next(sk);
3517 	if (sk)
3518 		return sk;
3519 
3520 
3521 	spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]);
3522 
3523 	*pos = set_bucket_offset(++bucket, 1);
3524 
3525 	return unix_get_first(seq, pos);
3526 }
3527 
3528 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
3529 {
3530 	if (!*pos)
3531 		return SEQ_START_TOKEN;
3532 
3533 	return unix_get_first(seq, pos);
3534 }
3535 
3536 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3537 {
3538 	++*pos;
3539 
3540 	if (v == SEQ_START_TOKEN)
3541 		return unix_get_first(seq, pos);
3542 
3543 	return unix_get_next(seq, v, pos);
3544 }
3545 
3546 static void unix_seq_stop(struct seq_file *seq, void *v)
3547 {
3548 	struct sock *sk = v;
3549 
3550 	if (sk)
3551 		spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]);
3552 }
3553 
3554 static int unix_seq_show(struct seq_file *seq, void *v)
3555 {
3556 
3557 	if (v == SEQ_START_TOKEN)
3558 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
3559 			 "Inode Path\n");
3560 	else {
3561 		struct sock *s = v;
3562 		struct unix_sock *u = unix_sk(s);
3563 		unix_state_lock(s);
3564 
3565 		seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
3566 			s,
3567 			refcount_read(&s->sk_refcnt),
3568 			0,
3569 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
3570 			s->sk_type,
3571 			s->sk_socket ?
3572 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
3573 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
3574 			sock_i_ino(s));
3575 
3576 		if (u->addr) {	// under a hash table lock here
3577 			int i, len;
3578 			seq_putc(seq, ' ');
3579 
3580 			i = 0;
3581 			len = u->addr->len -
3582 				offsetof(struct sockaddr_un, sun_path);
3583 			if (u->addr->name->sun_path[0]) {
3584 				len--;
3585 			} else {
3586 				seq_putc(seq, '@');
3587 				i++;
3588 			}
3589 			for ( ; i < len; i++)
3590 				seq_putc(seq, u->addr->name->sun_path[i] ?:
3591 					 '@');
3592 		}
3593 		unix_state_unlock(s);
3594 		seq_putc(seq, '\n');
3595 	}
3596 
3597 	return 0;
3598 }
3599 
3600 static const struct seq_operations unix_seq_ops = {
3601 	.start  = unix_seq_start,
3602 	.next   = unix_seq_next,
3603 	.stop   = unix_seq_stop,
3604 	.show   = unix_seq_show,
3605 };
3606 
3607 #ifdef CONFIG_BPF_SYSCALL
3608 struct bpf_unix_iter_state {
3609 	struct seq_net_private p;
3610 	unsigned int cur_sk;
3611 	unsigned int end_sk;
3612 	unsigned int max_sk;
3613 	struct sock **batch;
3614 	bool st_bucket_done;
3615 };
3616 
3617 struct bpf_iter__unix {
3618 	__bpf_md_ptr(struct bpf_iter_meta *, meta);
3619 	__bpf_md_ptr(struct unix_sock *, unix_sk);
3620 	uid_t uid __aligned(8);
3621 };
3622 
3623 static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
3624 			      struct unix_sock *unix_sk, uid_t uid)
3625 {
3626 	struct bpf_iter__unix ctx;
3627 
3628 	meta->seq_num--;  /* skip SEQ_START_TOKEN */
3629 	ctx.meta = meta;
3630 	ctx.unix_sk = unix_sk;
3631 	ctx.uid = uid;
3632 	return bpf_iter_run_prog(prog, &ctx);
3633 }
3634 
3635 static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)
3636 
3637 {
3638 	struct bpf_unix_iter_state *iter = seq->private;
3639 	unsigned int expected = 1;
3640 	struct sock *sk;
3641 
3642 	sock_hold(start_sk);
3643 	iter->batch[iter->end_sk++] = start_sk;
3644 
3645 	for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) {
3646 		if (iter->end_sk < iter->max_sk) {
3647 			sock_hold(sk);
3648 			iter->batch[iter->end_sk++] = sk;
3649 		}
3650 
3651 		expected++;
3652 	}
3653 
3654 	spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]);
3655 
3656 	return expected;
3657 }
3658 
3659 static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter)
3660 {
3661 	while (iter->cur_sk < iter->end_sk)
3662 		sock_put(iter->batch[iter->cur_sk++]);
3663 }
3664 
3665 static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter,
3666 				       unsigned int new_batch_sz)
3667 {
3668 	struct sock **new_batch;
3669 
3670 	new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
3671 			     GFP_USER | __GFP_NOWARN);
3672 	if (!new_batch)
3673 		return -ENOMEM;
3674 
3675 	bpf_iter_unix_put_batch(iter);
3676 	kvfree(iter->batch);
3677 	iter->batch = new_batch;
3678 	iter->max_sk = new_batch_sz;
3679 
3680 	return 0;
3681 }
3682 
3683 static struct sock *bpf_iter_unix_batch(struct seq_file *seq,
3684 					loff_t *pos)
3685 {
3686 	struct bpf_unix_iter_state *iter = seq->private;
3687 	unsigned int expected;
3688 	bool resized = false;
3689 	struct sock *sk;
3690 
3691 	if (iter->st_bucket_done)
3692 		*pos = set_bucket_offset(get_bucket(*pos) + 1, 1);
3693 
3694 again:
3695 	/* Get a new batch */
3696 	iter->cur_sk = 0;
3697 	iter->end_sk = 0;
3698 
3699 	sk = unix_get_first(seq, pos);
3700 	if (!sk)
3701 		return NULL; /* Done */
3702 
3703 	expected = bpf_iter_unix_hold_batch(seq, sk);
3704 
3705 	if (iter->end_sk == expected) {
3706 		iter->st_bucket_done = true;
3707 		return sk;
3708 	}
3709 
3710 	if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) {
3711 		resized = true;
3712 		goto again;
3713 	}
3714 
3715 	return sk;
3716 }
3717 
3718 static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos)
3719 {
3720 	if (!*pos)
3721 		return SEQ_START_TOKEN;
3722 
3723 	/* bpf iter does not support lseek, so it always
3724 	 * continue from where it was stop()-ped.
3725 	 */
3726 	return bpf_iter_unix_batch(seq, pos);
3727 }
3728 
3729 static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3730 {
3731 	struct bpf_unix_iter_state *iter = seq->private;
3732 	struct sock *sk;
3733 
3734 	/* Whenever seq_next() is called, the iter->cur_sk is
3735 	 * done with seq_show(), so advance to the next sk in
3736 	 * the batch.
3737 	 */
3738 	if (iter->cur_sk < iter->end_sk)
3739 		sock_put(iter->batch[iter->cur_sk++]);
3740 
3741 	++*pos;
3742 
3743 	if (iter->cur_sk < iter->end_sk)
3744 		sk = iter->batch[iter->cur_sk];
3745 	else
3746 		sk = bpf_iter_unix_batch(seq, pos);
3747 
3748 	return sk;
3749 }
3750 
3751 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
3752 {
3753 	struct bpf_iter_meta meta;
3754 	struct bpf_prog *prog;
3755 	struct sock *sk = v;
3756 	uid_t uid;
3757 	bool slow;
3758 	int ret;
3759 
3760 	if (v == SEQ_START_TOKEN)
3761 		return 0;
3762 
3763 	slow = lock_sock_fast(sk);
3764 
3765 	if (unlikely(sk_unhashed(sk))) {
3766 		ret = SEQ_SKIP;
3767 		goto unlock;
3768 	}
3769 
3770 	uid = from_kuid_munged(seq_user_ns(seq), sk_uid(sk));
3771 	meta.seq = seq;
3772 	prog = bpf_iter_get_info(&meta, false);
3773 	ret = unix_prog_seq_show(prog, &meta, v, uid);
3774 unlock:
3775 	unlock_sock_fast(sk, slow);
3776 	return ret;
3777 }
3778 
3779 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
3780 {
3781 	struct bpf_unix_iter_state *iter = seq->private;
3782 	struct bpf_iter_meta meta;
3783 	struct bpf_prog *prog;
3784 
3785 	if (!v) {
3786 		meta.seq = seq;
3787 		prog = bpf_iter_get_info(&meta, true);
3788 		if (prog)
3789 			(void)unix_prog_seq_show(prog, &meta, v, 0);
3790 	}
3791 
3792 	if (iter->cur_sk < iter->end_sk)
3793 		bpf_iter_unix_put_batch(iter);
3794 }
3795 
3796 static const struct seq_operations bpf_iter_unix_seq_ops = {
3797 	.start	= bpf_iter_unix_seq_start,
3798 	.next	= bpf_iter_unix_seq_next,
3799 	.stop	= bpf_iter_unix_seq_stop,
3800 	.show	= bpf_iter_unix_seq_show,
3801 };
3802 #endif
3803 #endif
3804 
3805 static const struct net_proto_family unix_family_ops = {
3806 	.family = PF_UNIX,
3807 	.create = unix_create,
3808 	.owner	= THIS_MODULE,
3809 };
3810 
3811 
3812 static int __net_init unix_net_init(struct net *net)
3813 {
3814 	int i;
3815 
3816 	net->unx.sysctl_max_dgram_qlen = 10;
3817 	if (unix_sysctl_register(net))
3818 		goto out;
3819 
3820 #ifdef CONFIG_PROC_FS
3821 	if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
3822 			     sizeof(struct seq_net_private)))
3823 		goto err_sysctl;
3824 #endif
3825 
3826 	net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE,
3827 					      sizeof(spinlock_t), GFP_KERNEL);
3828 	if (!net->unx.table.locks)
3829 		goto err_proc;
3830 
3831 	net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE,
3832 						sizeof(struct hlist_head),
3833 						GFP_KERNEL);
3834 	if (!net->unx.table.buckets)
3835 		goto free_locks;
3836 
3837 	for (i = 0; i < UNIX_HASH_SIZE; i++) {
3838 		spin_lock_init(&net->unx.table.locks[i]);
3839 		lock_set_cmp_fn(&net->unx.table.locks[i], unix_table_lock_cmp_fn, NULL);
3840 		INIT_HLIST_HEAD(&net->unx.table.buckets[i]);
3841 	}
3842 
3843 	return 0;
3844 
3845 free_locks:
3846 	kvfree(net->unx.table.locks);
3847 err_proc:
3848 #ifdef CONFIG_PROC_FS
3849 	remove_proc_entry("unix", net->proc_net);
3850 err_sysctl:
3851 #endif
3852 	unix_sysctl_unregister(net);
3853 out:
3854 	return -ENOMEM;
3855 }
3856 
3857 static void __net_exit unix_net_exit(struct net *net)
3858 {
3859 	kvfree(net->unx.table.buckets);
3860 	kvfree(net->unx.table.locks);
3861 	unix_sysctl_unregister(net);
3862 	remove_proc_entry("unix", net->proc_net);
3863 }
3864 
3865 static struct pernet_operations unix_net_ops = {
3866 	.init = unix_net_init,
3867 	.exit = unix_net_exit,
3868 };
3869 
3870 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3871 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
3872 		     struct unix_sock *unix_sk, uid_t uid)
3873 
3874 #define INIT_BATCH_SZ 16
3875 
3876 static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux)
3877 {
3878 	struct bpf_unix_iter_state *iter = priv_data;
3879 	int err;
3880 
3881 	err = bpf_iter_init_seq_net(priv_data, aux);
3882 	if (err)
3883 		return err;
3884 
3885 	err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ);
3886 	if (err) {
3887 		bpf_iter_fini_seq_net(priv_data);
3888 		return err;
3889 	}
3890 
3891 	return 0;
3892 }
3893 
3894 static void bpf_iter_fini_unix(void *priv_data)
3895 {
3896 	struct bpf_unix_iter_state *iter = priv_data;
3897 
3898 	bpf_iter_fini_seq_net(priv_data);
3899 	kvfree(iter->batch);
3900 }
3901 
3902 static const struct bpf_iter_seq_info unix_seq_info = {
3903 	.seq_ops		= &bpf_iter_unix_seq_ops,
3904 	.init_seq_private	= bpf_iter_init_unix,
3905 	.fini_seq_private	= bpf_iter_fini_unix,
3906 	.seq_priv_size		= sizeof(struct bpf_unix_iter_state),
3907 };
3908 
3909 static const struct bpf_func_proto *
3910 bpf_iter_unix_get_func_proto(enum bpf_func_id func_id,
3911 			     const struct bpf_prog *prog)
3912 {
3913 	switch (func_id) {
3914 	case BPF_FUNC_setsockopt:
3915 		return &bpf_sk_setsockopt_proto;
3916 	case BPF_FUNC_getsockopt:
3917 		return &bpf_sk_getsockopt_proto;
3918 	default:
3919 		return NULL;
3920 	}
3921 }
3922 
3923 static struct bpf_iter_reg unix_reg_info = {
3924 	.target			= "unix",
3925 	.ctx_arg_info_size	= 1,
3926 	.ctx_arg_info		= {
3927 		{ offsetof(struct bpf_iter__unix, unix_sk),
3928 		  PTR_TO_BTF_ID_OR_NULL },
3929 	},
3930 	.get_func_proto         = bpf_iter_unix_get_func_proto,
3931 	.seq_info		= &unix_seq_info,
3932 };
3933 
3934 static void __init bpf_iter_register(void)
3935 {
3936 	unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
3937 	if (bpf_iter_reg_target(&unix_reg_info))
3938 		pr_warn("Warning: could not register bpf iterator unix\n");
3939 }
3940 #endif
3941 
3942 static int __init af_unix_init(void)
3943 {
3944 	int i, rc = -1;
3945 
3946 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
3947 
3948 	for (i = 0; i < UNIX_HASH_SIZE / 2; i++) {
3949 		spin_lock_init(&bsd_socket_locks[i]);
3950 		INIT_HLIST_HEAD(&bsd_socket_buckets[i]);
3951 	}
3952 
3953 	rc = proto_register(&unix_dgram_proto, 1);
3954 	if (rc != 0) {
3955 		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3956 		goto out;
3957 	}
3958 
3959 	rc = proto_register(&unix_stream_proto, 1);
3960 	if (rc != 0) {
3961 		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3962 		proto_unregister(&unix_dgram_proto);
3963 		goto out;
3964 	}
3965 
3966 	sock_register(&unix_family_ops);
3967 	register_pernet_subsys(&unix_net_ops);
3968 	unix_bpf_build_proto();
3969 
3970 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3971 	bpf_iter_register();
3972 #endif
3973 
3974 out:
3975 	return rc;
3976 }
3977 
3978 /* Later than subsys_initcall() because we depend on stuff initialised there */
3979 fs_initcall(af_unix_init);
3980