xref: /linux/net/unix/af_unix.c (revision 0e50474fa514822e9d990874e554bf8043a201d7)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * NET4:	Implementation of BSD Unix domain sockets.
4  *
5  * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
6  *
7  * Fixes:
8  *		Linus Torvalds	:	Assorted bug cures.
9  *		Niibe Yutaka	:	async I/O support.
10  *		Carsten Paeth	:	PF_UNIX check, address fixes.
11  *		Alan Cox	:	Limit size of allocated blocks.
12  *		Alan Cox	:	Fixed the stupid socketpair bug.
13  *		Alan Cox	:	BSD compatibility fine tuning.
14  *		Alan Cox	:	Fixed a bug in connect when interrupted.
15  *		Alan Cox	:	Sorted out a proper draft version of
16  *					file descriptor passing hacked up from
17  *					Mike Shaver's work.
18  *		Marty Leisner	:	Fixes to fd passing
19  *		Nick Nevin	:	recvmsg bugfix.
20  *		Alan Cox	:	Started proper garbage collector
21  *		Heiko EiBfeldt	:	Missing verify_area check
22  *		Alan Cox	:	Started POSIXisms
23  *		Andreas Schwab	:	Replace inode by dentry for proper
24  *					reference counting
25  *		Kirk Petersen	:	Made this a module
26  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
27  *					Lots of bug fixes.
28  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
29  *					by above two patches.
30  *	     Andrea Arcangeli	:	If possible we block in connect(2)
31  *					if the max backlog of the listen socket
32  *					is been reached. This won't break
33  *					old apps and it will avoid huge amount
34  *					of socks hashed (this for unix_gc()
35  *					performances reasons).
36  *					Security fix that limits the max
37  *					number of socks to 2*max_files and
38  *					the number of skb queueable in the
39  *					dgram receiver.
40  *		Artur Skawina   :	Hash function optimizations
41  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
42  *	      Malcolm Beattie   :	Set peercred for socketpair
43  *	     Michal Ostrowski   :       Module initialization cleanup.
44  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
45  *	     				the core infrastructure is doing that
46  *	     				for all net proto families now (2.5.69+)
47  *
48  * Known differences from reference BSD that was tested:
49  *
50  *	[TO FIX]
51  *	ECONNREFUSED is not returned from one end of a connected() socket to the
52  *		other the moment one end closes.
53  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
54  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
55  *	[NOT TO FIX]
56  *	accept() returns a path name even if the connecting socket has closed
57  *		in the meantime (BSD loses the path and gives up).
58  *	accept() returns 0 length path for an unbound connector. BSD returns 16
59  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
61  *	BSD af_unix apparently has connect forgetting to block properly.
62  *		(need to check this with the POSIX spec in detail)
63  *
64  * Differences from 2.0.0-11-... (ANK)
65  *	Bug fixes and improvements.
66  *		- client shutdown killed server socket.
67  *		- removed all useless cli/sti pairs.
68  *
69  *	Semantic changes/extensions.
70  *		- generic control message passing.
71  *		- SCM_CREDENTIALS control message.
72  *		- "Abstract" (not FS based) socket bindings.
73  *		  Abstract names are sequences of bytes (not zero terminated)
74  *		  started by 0, so that this name space does not intersect
75  *		  with BSD names.
76  */
77 
78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
79 
80 #include <linux/bpf-cgroup.h>
81 #include <linux/btf_ids.h>
82 #include <linux/dcache.h>
83 #include <linux/errno.h>
84 #include <linux/fcntl.h>
85 #include <linux/file.h>
86 #include <linux/filter.h>
87 #include <linux/fs.h>
88 #include <linux/fs_struct.h>
89 #include <linux/init.h>
90 #include <linux/kernel.h>
91 #include <linux/mount.h>
92 #include <linux/namei.h>
93 #include <linux/net.h>
94 #include <linux/pidfs.h>
95 #include <linux/poll.h>
96 #include <linux/proc_fs.h>
97 #include <linux/sched/signal.h>
98 #include <linux/security.h>
99 #include <linux/seq_file.h>
100 #include <linux/skbuff.h>
101 #include <linux/slab.h>
102 #include <linux/socket.h>
103 #include <linux/splice.h>
104 #include <linux/string.h>
105 #include <linux/uaccess.h>
106 #include <net/af_unix.h>
107 #include <net/net_namespace.h>
108 #include <net/scm.h>
109 #include <net/tcp_states.h>
110 #include <uapi/linux/sockios.h>
111 #include <uapi/linux/termios.h>
112 
113 #include "af_unix.h"
114 
115 static atomic_long_t unix_nr_socks;
116 static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
117 static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
118 
119 /* SMP locking strategy:
120  *    hash table is protected with spinlock.
121  *    each socket state is protected by separate spinlock.
122  */
123 #ifdef CONFIG_PROVE_LOCKING
124 #define cmp_ptr(l, r)	(((l) > (r)) - ((l) < (r)))
125 
126 static int unix_table_lock_cmp_fn(const struct lockdep_map *a,
127 				  const struct lockdep_map *b)
128 {
129 	return cmp_ptr(a, b);
130 }
131 
132 static int unix_state_lock_cmp_fn(const struct lockdep_map *_a,
133 				  const struct lockdep_map *_b)
134 {
135 	const struct unix_sock *a, *b;
136 
137 	a = container_of(_a, struct unix_sock, lock.dep_map);
138 	b = container_of(_b, struct unix_sock, lock.dep_map);
139 
140 	if (a->sk.sk_state == TCP_LISTEN) {
141 		/* unix_stream_connect(): Before the 2nd unix_state_lock(),
142 		 *
143 		 *   1. a is TCP_LISTEN.
144 		 *   2. b is not a.
145 		 *   3. concurrent connect(b -> a) must fail.
146 		 *
147 		 * Except for 2. & 3., the b's state can be any possible
148 		 * value due to concurrent connect() or listen().
149 		 *
150 		 * 2. is detected in debug_spin_lock_before(), and 3. cannot
151 		 * be expressed as lock_cmp_fn.
152 		 */
153 		switch (b->sk.sk_state) {
154 		case TCP_CLOSE:
155 		case TCP_ESTABLISHED:
156 		case TCP_LISTEN:
157 			return -1;
158 		default:
159 			/* Invalid case. */
160 			return 0;
161 		}
162 	}
163 
164 	/* Should never happen.  Just to be symmetric. */
165 	if (b->sk.sk_state == TCP_LISTEN) {
166 		switch (b->sk.sk_state) {
167 		case TCP_CLOSE:
168 		case TCP_ESTABLISHED:
169 			return 1;
170 		default:
171 			return 0;
172 		}
173 	}
174 
175 	/* unix_state_double_lock(): ascending address order. */
176 	return cmp_ptr(a, b);
177 }
178 
179 static int unix_recvq_lock_cmp_fn(const struct lockdep_map *_a,
180 				  const struct lockdep_map *_b)
181 {
182 	const struct sock *a, *b;
183 
184 	a = container_of(_a, struct sock, sk_receive_queue.lock.dep_map);
185 	b = container_of(_b, struct sock, sk_receive_queue.lock.dep_map);
186 
187 	/* unix_collect_skb(): listener -> embryo order. */
188 	if (a->sk_state == TCP_LISTEN && unix_sk(b)->listener == a)
189 		return -1;
190 
191 	/* Should never happen.  Just to be symmetric. */
192 	if (b->sk_state == TCP_LISTEN && unix_sk(a)->listener == b)
193 		return 1;
194 
195 	return 0;
196 }
197 #endif
198 
199 static unsigned int unix_unbound_hash(struct sock *sk)
200 {
201 	unsigned long hash = (unsigned long)sk;
202 
203 	hash ^= hash >> 16;
204 	hash ^= hash >> 8;
205 	hash ^= sk->sk_type;
206 
207 	return hash & UNIX_HASH_MOD;
208 }
209 
210 static unsigned int unix_bsd_hash(struct inode *i)
211 {
212 	return i->i_ino & UNIX_HASH_MOD;
213 }
214 
215 static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr,
216 				       int addr_len, int type)
217 {
218 	__wsum csum = csum_partial(sunaddr, addr_len, 0);
219 	unsigned int hash;
220 
221 	hash = (__force unsigned int)csum_fold(csum);
222 	hash ^= hash >> 8;
223 	hash ^= type;
224 
225 	return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD);
226 }
227 
228 static void unix_table_double_lock(struct net *net,
229 				   unsigned int hash1, unsigned int hash2)
230 {
231 	if (hash1 == hash2) {
232 		spin_lock(&net->unx.table.locks[hash1]);
233 		return;
234 	}
235 
236 	if (hash1 > hash2)
237 		swap(hash1, hash2);
238 
239 	spin_lock(&net->unx.table.locks[hash1]);
240 	spin_lock(&net->unx.table.locks[hash2]);
241 }
242 
243 static void unix_table_double_unlock(struct net *net,
244 				     unsigned int hash1, unsigned int hash2)
245 {
246 	if (hash1 == hash2) {
247 		spin_unlock(&net->unx.table.locks[hash1]);
248 		return;
249 	}
250 
251 	spin_unlock(&net->unx.table.locks[hash1]);
252 	spin_unlock(&net->unx.table.locks[hash2]);
253 }
254 
255 #ifdef CONFIG_SECURITY_NETWORK
256 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
257 {
258 	UNIXCB(skb).secid = scm->secid;
259 }
260 
261 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
262 {
263 	scm->secid = UNIXCB(skb).secid;
264 }
265 
266 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
267 {
268 	return (scm->secid == UNIXCB(skb).secid);
269 }
270 #else
271 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
272 { }
273 
274 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
275 { }
276 
277 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
278 {
279 	return true;
280 }
281 #endif /* CONFIG_SECURITY_NETWORK */
282 
283 static inline int unix_may_send(struct sock *sk, struct sock *osk)
284 {
285 	return !unix_peer(osk) || unix_peer(osk) == sk;
286 }
287 
288 static inline int unix_recvq_full_lockless(const struct sock *sk)
289 {
290 	return skb_queue_len_lockless(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
291 }
292 
293 struct sock *unix_peer_get(struct sock *s)
294 {
295 	struct sock *peer;
296 
297 	unix_state_lock(s);
298 	peer = unix_peer(s);
299 	if (peer)
300 		sock_hold(peer);
301 	unix_state_unlock(s);
302 	return peer;
303 }
304 EXPORT_SYMBOL_GPL(unix_peer_get);
305 
306 static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr,
307 					     int addr_len)
308 {
309 	struct unix_address *addr;
310 
311 	addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL);
312 	if (!addr)
313 		return NULL;
314 
315 	refcount_set(&addr->refcnt, 1);
316 	addr->len = addr_len;
317 	memcpy(addr->name, sunaddr, addr_len);
318 
319 	return addr;
320 }
321 
322 static inline void unix_release_addr(struct unix_address *addr)
323 {
324 	if (refcount_dec_and_test(&addr->refcnt))
325 		kfree(addr);
326 }
327 
328 /*
329  *	Check unix socket name:
330  *		- should be not zero length.
331  *	        - if started by not zero, should be NULL terminated (FS object)
332  *		- if started by zero, it is abstract name.
333  */
334 
335 static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len)
336 {
337 	if (addr_len <= offsetof(struct sockaddr_un, sun_path) ||
338 	    addr_len > sizeof(*sunaddr))
339 		return -EINVAL;
340 
341 	if (sunaddr->sun_family != AF_UNIX)
342 		return -EINVAL;
343 
344 	return 0;
345 }
346 
347 static int unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len)
348 {
349 	struct sockaddr_storage *addr = (struct sockaddr_storage *)sunaddr;
350 	short offset = offsetof(struct sockaddr_storage, __data);
351 
352 	BUILD_BUG_ON(offset != offsetof(struct sockaddr_un, sun_path));
353 
354 	/* This may look like an off by one error but it is a bit more
355 	 * subtle.  108 is the longest valid AF_UNIX path for a binding.
356 	 * sun_path[108] doesn't as such exist.  However in kernel space
357 	 * we are guaranteed that it is a valid memory location in our
358 	 * kernel address buffer because syscall functions always pass
359 	 * a pointer of struct sockaddr_storage which has a bigger buffer
360 	 * than 108.  Also, we must terminate sun_path for strlen() in
361 	 * getname_kernel().
362 	 */
363 	addr->__data[addr_len - offset] = 0;
364 
365 	/* Don't pass sunaddr->sun_path to strlen().  Otherwise, 108 will
366 	 * cause panic if CONFIG_FORTIFY_SOURCE=y.  Let __fortify_strlen()
367 	 * know the actual buffer.
368 	 */
369 	return strlen(addr->__data) + offset + 1;
370 }
371 
372 static void __unix_remove_socket(struct sock *sk)
373 {
374 	sk_del_node_init(sk);
375 }
376 
377 static void __unix_insert_socket(struct net *net, struct sock *sk)
378 {
379 	DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
380 	sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]);
381 }
382 
383 static void __unix_set_addr_hash(struct net *net, struct sock *sk,
384 				 struct unix_address *addr, unsigned int hash)
385 {
386 	__unix_remove_socket(sk);
387 	smp_store_release(&unix_sk(sk)->addr, addr);
388 
389 	sk->sk_hash = hash;
390 	__unix_insert_socket(net, sk);
391 }
392 
393 static void unix_remove_socket(struct net *net, struct sock *sk)
394 {
395 	spin_lock(&net->unx.table.locks[sk->sk_hash]);
396 	__unix_remove_socket(sk);
397 	spin_unlock(&net->unx.table.locks[sk->sk_hash]);
398 }
399 
400 static void unix_insert_unbound_socket(struct net *net, struct sock *sk)
401 {
402 	spin_lock(&net->unx.table.locks[sk->sk_hash]);
403 	__unix_insert_socket(net, sk);
404 	spin_unlock(&net->unx.table.locks[sk->sk_hash]);
405 }
406 
407 static void unix_insert_bsd_socket(struct sock *sk)
408 {
409 	spin_lock(&bsd_socket_locks[sk->sk_hash]);
410 	sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]);
411 	spin_unlock(&bsd_socket_locks[sk->sk_hash]);
412 }
413 
414 static void unix_remove_bsd_socket(struct sock *sk)
415 {
416 	if (!hlist_unhashed(&sk->sk_bind_node)) {
417 		spin_lock(&bsd_socket_locks[sk->sk_hash]);
418 		__sk_del_bind_node(sk);
419 		spin_unlock(&bsd_socket_locks[sk->sk_hash]);
420 
421 		sk_node_init(&sk->sk_bind_node);
422 	}
423 }
424 
425 static struct sock *__unix_find_socket_byname(struct net *net,
426 					      struct sockaddr_un *sunname,
427 					      int len, unsigned int hash)
428 {
429 	struct sock *s;
430 
431 	sk_for_each(s, &net->unx.table.buckets[hash]) {
432 		struct unix_sock *u = unix_sk(s);
433 
434 		if (u->addr->len == len &&
435 		    !memcmp(u->addr->name, sunname, len))
436 			return s;
437 	}
438 	return NULL;
439 }
440 
441 static inline struct sock *unix_find_socket_byname(struct net *net,
442 						   struct sockaddr_un *sunname,
443 						   int len, unsigned int hash)
444 {
445 	struct sock *s;
446 
447 	spin_lock(&net->unx.table.locks[hash]);
448 	s = __unix_find_socket_byname(net, sunname, len, hash);
449 	if (s)
450 		sock_hold(s);
451 	spin_unlock(&net->unx.table.locks[hash]);
452 	return s;
453 }
454 
455 static struct sock *unix_find_socket_byinode(struct inode *i)
456 {
457 	unsigned int hash = unix_bsd_hash(i);
458 	struct sock *s;
459 
460 	spin_lock(&bsd_socket_locks[hash]);
461 	sk_for_each_bound(s, &bsd_socket_buckets[hash]) {
462 		struct dentry *dentry = unix_sk(s)->path.dentry;
463 
464 		if (dentry && d_backing_inode(dentry) == i) {
465 			sock_hold(s);
466 			spin_unlock(&bsd_socket_locks[hash]);
467 			return s;
468 		}
469 	}
470 	spin_unlock(&bsd_socket_locks[hash]);
471 	return NULL;
472 }
473 
474 /* Support code for asymmetrically connected dgram sockets
475  *
476  * If a datagram socket is connected to a socket not itself connected
477  * to the first socket (eg, /dev/log), clients may only enqueue more
478  * messages if the present receive queue of the server socket is not
479  * "too large". This means there's a second writeability condition
480  * poll and sendmsg need to test. The dgram recv code will do a wake
481  * up on the peer_wait wait queue of a socket upon reception of a
482  * datagram which needs to be propagated to sleeping would-be writers
483  * since these might not have sent anything so far. This can't be
484  * accomplished via poll_wait because the lifetime of the server
485  * socket might be less than that of its clients if these break their
486  * association with it or if the server socket is closed while clients
487  * are still connected to it and there's no way to inform "a polling
488  * implementation" that it should let go of a certain wait queue
489  *
490  * In order to propagate a wake up, a wait_queue_entry_t of the client
491  * socket is enqueued on the peer_wait queue of the server socket
492  * whose wake function does a wake_up on the ordinary client socket
493  * wait queue. This connection is established whenever a write (or
494  * poll for write) hit the flow control condition and broken when the
495  * association to the server socket is dissolved or after a wake up
496  * was relayed.
497  */
498 
499 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
500 				      void *key)
501 {
502 	struct unix_sock *u;
503 	wait_queue_head_t *u_sleep;
504 
505 	u = container_of(q, struct unix_sock, peer_wake);
506 
507 	__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
508 			    q);
509 	u->peer_wake.private = NULL;
510 
511 	/* relaying can only happen while the wq still exists */
512 	u_sleep = sk_sleep(&u->sk);
513 	if (u_sleep)
514 		wake_up_interruptible_poll(u_sleep, key_to_poll(key));
515 
516 	return 0;
517 }
518 
519 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
520 {
521 	struct unix_sock *u, *u_other;
522 	int rc;
523 
524 	u = unix_sk(sk);
525 	u_other = unix_sk(other);
526 	rc = 0;
527 	spin_lock(&u_other->peer_wait.lock);
528 
529 	if (!u->peer_wake.private) {
530 		u->peer_wake.private = other;
531 		__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
532 
533 		rc = 1;
534 	}
535 
536 	spin_unlock(&u_other->peer_wait.lock);
537 	return rc;
538 }
539 
540 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
541 					    struct sock *other)
542 {
543 	struct unix_sock *u, *u_other;
544 
545 	u = unix_sk(sk);
546 	u_other = unix_sk(other);
547 	spin_lock(&u_other->peer_wait.lock);
548 
549 	if (u->peer_wake.private == other) {
550 		__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
551 		u->peer_wake.private = NULL;
552 	}
553 
554 	spin_unlock(&u_other->peer_wait.lock);
555 }
556 
557 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
558 						   struct sock *other)
559 {
560 	unix_dgram_peer_wake_disconnect(sk, other);
561 	wake_up_interruptible_poll(sk_sleep(sk),
562 				   EPOLLOUT |
563 				   EPOLLWRNORM |
564 				   EPOLLWRBAND);
565 }
566 
567 /* preconditions:
568  *	- unix_peer(sk) == other
569  *	- association is stable
570  */
571 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
572 {
573 	int connected;
574 
575 	connected = unix_dgram_peer_wake_connect(sk, other);
576 
577 	/* If other is SOCK_DEAD, we want to make sure we signal
578 	 * POLLOUT, such that a subsequent write() can get a
579 	 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
580 	 * to other and its full, we will hang waiting for POLLOUT.
581 	 */
582 	if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
583 		return 1;
584 
585 	if (connected)
586 		unix_dgram_peer_wake_disconnect(sk, other);
587 
588 	return 0;
589 }
590 
591 static int unix_writable(const struct sock *sk, unsigned char state)
592 {
593 	return state != TCP_LISTEN &&
594 		(refcount_read(&sk->sk_wmem_alloc) << 2) <= READ_ONCE(sk->sk_sndbuf);
595 }
596 
597 static void unix_write_space(struct sock *sk)
598 {
599 	struct socket_wq *wq;
600 
601 	rcu_read_lock();
602 	if (unix_writable(sk, READ_ONCE(sk->sk_state))) {
603 		wq = rcu_dereference(sk->sk_wq);
604 		if (skwq_has_sleeper(wq))
605 			wake_up_interruptible_sync_poll(&wq->wait,
606 				EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
607 		sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
608 	}
609 	rcu_read_unlock();
610 }
611 
612 /* When dgram socket disconnects (or changes its peer), we clear its receive
613  * queue of packets arrived from previous peer. First, it allows to do
614  * flow control based only on wmem_alloc; second, sk connected to peer
615  * may receive messages only from that peer. */
616 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
617 {
618 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
619 		skb_queue_purge_reason(&sk->sk_receive_queue,
620 				       SKB_DROP_REASON_UNIX_DISCONNECT);
621 
622 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
623 
624 		/* If one link of bidirectional dgram pipe is disconnected,
625 		 * we signal error. Messages are lost. Do not make this,
626 		 * when peer was not connected to us.
627 		 */
628 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
629 			WRITE_ONCE(other->sk_err, ECONNRESET);
630 			sk_error_report(other);
631 		}
632 	}
633 }
634 
635 static void unix_sock_destructor(struct sock *sk)
636 {
637 	struct unix_sock *u = unix_sk(sk);
638 
639 	skb_queue_purge_reason(&sk->sk_receive_queue, SKB_DROP_REASON_SOCKET_CLOSE);
640 
641 	DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc));
642 	DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
643 	DEBUG_NET_WARN_ON_ONCE(sk->sk_socket);
644 	if (!sock_flag(sk, SOCK_DEAD)) {
645 		pr_info("Attempt to release alive unix socket: %p\n", sk);
646 		return;
647 	}
648 
649 	if (u->addr)
650 		unix_release_addr(u->addr);
651 
652 	atomic_long_dec(&unix_nr_socks);
653 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
654 #ifdef UNIX_REFCNT_DEBUG
655 	pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
656 		atomic_long_read(&unix_nr_socks));
657 #endif
658 }
659 
660 static unsigned int unix_skb_len(const struct sk_buff *skb)
661 {
662 	return skb->len - UNIXCB(skb).consumed;
663 }
664 
665 static void unix_release_sock(struct sock *sk, int embrion)
666 {
667 	struct unix_sock *u = unix_sk(sk);
668 	struct sock *skpair;
669 	struct sk_buff *skb;
670 	struct path path;
671 	int state;
672 
673 	unix_remove_socket(sock_net(sk), sk);
674 	unix_remove_bsd_socket(sk);
675 
676 	/* Clear state */
677 	unix_state_lock(sk);
678 	sock_orphan(sk);
679 	WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
680 	path	     = u->path;
681 	u->path.dentry = NULL;
682 	u->path.mnt = NULL;
683 	state = sk->sk_state;
684 	WRITE_ONCE(sk->sk_state, TCP_CLOSE);
685 
686 	skpair = unix_peer(sk);
687 	unix_peer(sk) = NULL;
688 
689 	unix_state_unlock(sk);
690 
691 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
692 	u->oob_skb = NULL;
693 #endif
694 
695 	wake_up_interruptible_all(&u->peer_wait);
696 
697 	if (skpair != NULL) {
698 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
699 			struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
700 
701 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
702 			if (skb && !unix_skb_len(skb))
703 				skb = skb_peek_next(skb, &sk->sk_receive_queue);
704 #endif
705 			unix_state_lock(skpair);
706 			/* No more writes */
707 			WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
708 			if (skb || embrion)
709 				WRITE_ONCE(skpair->sk_err, ECONNRESET);
710 			unix_state_unlock(skpair);
711 			skpair->sk_state_change(skpair);
712 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
713 		}
714 
715 		unix_dgram_peer_wake_disconnect(sk, skpair);
716 		sock_put(skpair); /* It may now die */
717 	}
718 
719 	/* Try to flush out this socket. Throw out buffers at least */
720 
721 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
722 		if (state == TCP_LISTEN)
723 			unix_release_sock(skb->sk, 1);
724 
725 		/* passed fds are erased in the kfree_skb hook */
726 		kfree_skb_reason(skb, SKB_DROP_REASON_SOCKET_CLOSE);
727 	}
728 
729 	if (path.dentry)
730 		path_put(&path);
731 
732 	sock_put(sk);
733 
734 	/* ---- Socket is dead now and most probably destroyed ---- */
735 
736 	if (READ_ONCE(unix_tot_inflight))
737 		unix_gc();		/* Garbage collect fds */
738 }
739 
740 struct unix_peercred {
741 	struct pid *peer_pid;
742 	const struct cred *peer_cred;
743 };
744 
745 static inline int prepare_peercred(struct unix_peercred *peercred)
746 {
747 	struct pid *pid;
748 	int err;
749 
750 	pid = task_tgid(current);
751 	err = pidfs_register_pid(pid);
752 	if (likely(!err)) {
753 		peercred->peer_pid = get_pid(pid);
754 		peercred->peer_cred = get_current_cred();
755 	}
756 	return err;
757 }
758 
759 static void drop_peercred(struct unix_peercred *peercred)
760 {
761 	const struct cred *cred = NULL;
762 	struct pid *pid = NULL;
763 
764 	might_sleep();
765 
766 	swap(peercred->peer_pid, pid);
767 	swap(peercred->peer_cred, cred);
768 
769 	put_pid(pid);
770 	put_cred(cred);
771 }
772 
773 static inline void init_peercred(struct sock *sk,
774 				 const struct unix_peercred *peercred)
775 {
776 	sk->sk_peer_pid = peercred->peer_pid;
777 	sk->sk_peer_cred = peercred->peer_cred;
778 }
779 
780 static void update_peercred(struct sock *sk, struct unix_peercred *peercred)
781 {
782 	const struct cred *old_cred;
783 	struct pid *old_pid;
784 
785 	spin_lock(&sk->sk_peer_lock);
786 	old_pid = sk->sk_peer_pid;
787 	old_cred = sk->sk_peer_cred;
788 	init_peercred(sk, peercred);
789 	spin_unlock(&sk->sk_peer_lock);
790 
791 	peercred->peer_pid = old_pid;
792 	peercred->peer_cred = old_cred;
793 }
794 
795 static void copy_peercred(struct sock *sk, struct sock *peersk)
796 {
797 	lockdep_assert_held(&unix_sk(peersk)->lock);
798 
799 	spin_lock(&sk->sk_peer_lock);
800 	sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
801 	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
802 	spin_unlock(&sk->sk_peer_lock);
803 }
804 
805 static bool unix_may_passcred(const struct sock *sk)
806 {
807 	return sk->sk_scm_credentials || sk->sk_scm_pidfd;
808 }
809 
810 static int unix_listen(struct socket *sock, int backlog)
811 {
812 	int err;
813 	struct sock *sk = sock->sk;
814 	struct unix_sock *u = unix_sk(sk);
815 	struct unix_peercred peercred = {};
816 
817 	err = -EOPNOTSUPP;
818 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
819 		goto out;	/* Only stream/seqpacket sockets accept */
820 	err = -EINVAL;
821 	if (!READ_ONCE(u->addr))
822 		goto out;	/* No listens on an unbound socket */
823 	err = prepare_peercred(&peercred);
824 	if (err)
825 		goto out;
826 	unix_state_lock(sk);
827 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
828 		goto out_unlock;
829 	if (backlog > sk->sk_max_ack_backlog)
830 		wake_up_interruptible_all(&u->peer_wait);
831 	sk->sk_max_ack_backlog	= backlog;
832 	WRITE_ONCE(sk->sk_state, TCP_LISTEN);
833 
834 	/* set credentials so connect can copy them */
835 	update_peercred(sk, &peercred);
836 	err = 0;
837 
838 out_unlock:
839 	unix_state_unlock(sk);
840 	drop_peercred(&peercred);
841 out:
842 	return err;
843 }
844 
845 static int unix_release(struct socket *);
846 static int unix_bind(struct socket *, struct sockaddr_unsized *, int);
847 static int unix_stream_connect(struct socket *, struct sockaddr *,
848 			       int addr_len, int flags);
849 static int unix_socketpair(struct socket *, struct socket *);
850 static int unix_accept(struct socket *, struct socket *, struct proto_accept_arg *arg);
851 static int unix_getname(struct socket *, struct sockaddr *, int);
852 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
853 static __poll_t unix_dgram_poll(struct file *, struct socket *,
854 				    poll_table *);
855 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
856 #ifdef CONFIG_COMPAT
857 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
858 #endif
859 static int unix_shutdown(struct socket *, int);
860 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
861 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
862 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
863 				       struct pipe_inode_info *, size_t size,
864 				       unsigned int flags);
865 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
866 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
867 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
868 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
869 static int unix_dgram_connect(struct socket *, struct sockaddr *,
870 			      int, int);
871 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
872 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
873 				  int);
874 
875 #ifdef CONFIG_PROC_FS
876 static int unix_count_nr_fds(struct sock *sk)
877 {
878 	struct sk_buff *skb;
879 	struct unix_sock *u;
880 	int nr_fds = 0;
881 
882 	spin_lock(&sk->sk_receive_queue.lock);
883 	skb = skb_peek(&sk->sk_receive_queue);
884 	while (skb) {
885 		u = unix_sk(skb->sk);
886 		nr_fds += atomic_read(&u->scm_stat.nr_fds);
887 		skb = skb_peek_next(skb, &sk->sk_receive_queue);
888 	}
889 	spin_unlock(&sk->sk_receive_queue.lock);
890 
891 	return nr_fds;
892 }
893 
894 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
895 {
896 	struct sock *sk = sock->sk;
897 	unsigned char s_state;
898 	struct unix_sock *u;
899 	int nr_fds = 0;
900 
901 	if (sk) {
902 		s_state = READ_ONCE(sk->sk_state);
903 		u = unix_sk(sk);
904 
905 		/* SOCK_STREAM and SOCK_SEQPACKET sockets never change their
906 		 * sk_state after switching to TCP_ESTABLISHED or TCP_LISTEN.
907 		 * SOCK_DGRAM is ordinary. So, no lock is needed.
908 		 */
909 		if (sock->type == SOCK_DGRAM || s_state == TCP_ESTABLISHED)
910 			nr_fds = atomic_read(&u->scm_stat.nr_fds);
911 		else if (s_state == TCP_LISTEN)
912 			nr_fds = unix_count_nr_fds(sk);
913 
914 		seq_printf(m, "scm_fds: %u\n", nr_fds);
915 	}
916 }
917 #else
918 #define unix_show_fdinfo NULL
919 #endif
920 
921 static bool unix_custom_sockopt(int optname)
922 {
923 	switch (optname) {
924 	case SO_INQ:
925 		return true;
926 	default:
927 		return false;
928 	}
929 }
930 
931 static int unix_setsockopt(struct socket *sock, int level, int optname,
932 			   sockptr_t optval, unsigned int optlen)
933 {
934 	struct unix_sock *u = unix_sk(sock->sk);
935 	struct sock *sk = sock->sk;
936 	int val;
937 
938 	if (level != SOL_SOCKET)
939 		return -EOPNOTSUPP;
940 
941 	if (!unix_custom_sockopt(optname))
942 		return sock_setsockopt(sock, level, optname, optval, optlen);
943 
944 	if (optlen != sizeof(int))
945 		return -EINVAL;
946 
947 	if (copy_from_sockptr(&val, optval, sizeof(val)))
948 		return -EFAULT;
949 
950 	switch (optname) {
951 	case SO_INQ:
952 		if (sk->sk_type != SOCK_STREAM)
953 			return -EINVAL;
954 
955 		if (val > 1 || val < 0)
956 			return -EINVAL;
957 
958 		WRITE_ONCE(u->recvmsg_inq, val);
959 		break;
960 	default:
961 		return -ENOPROTOOPT;
962 	}
963 
964 	return 0;
965 }
966 
967 static const struct proto_ops unix_stream_ops = {
968 	.family =	PF_UNIX,
969 	.owner =	THIS_MODULE,
970 	.release =	unix_release,
971 	.bind =		unix_bind,
972 	.connect =	unix_stream_connect,
973 	.socketpair =	unix_socketpair,
974 	.accept =	unix_accept,
975 	.getname =	unix_getname,
976 	.poll =		unix_poll,
977 	.ioctl =	unix_ioctl,
978 #ifdef CONFIG_COMPAT
979 	.compat_ioctl =	unix_compat_ioctl,
980 #endif
981 	.listen =	unix_listen,
982 	.shutdown =	unix_shutdown,
983 	.setsockopt =	unix_setsockopt,
984 	.sendmsg =	unix_stream_sendmsg,
985 	.recvmsg =	unix_stream_recvmsg,
986 	.read_skb =	unix_stream_read_skb,
987 	.mmap =		sock_no_mmap,
988 	.splice_read =	unix_stream_splice_read,
989 	.set_peek_off =	sk_set_peek_off,
990 	.show_fdinfo =	unix_show_fdinfo,
991 };
992 
993 static const struct proto_ops unix_dgram_ops = {
994 	.family =	PF_UNIX,
995 	.owner =	THIS_MODULE,
996 	.release =	unix_release,
997 	.bind =		unix_bind,
998 	.connect =	unix_dgram_connect,
999 	.socketpair =	unix_socketpair,
1000 	.accept =	sock_no_accept,
1001 	.getname =	unix_getname,
1002 	.poll =		unix_dgram_poll,
1003 	.ioctl =	unix_ioctl,
1004 #ifdef CONFIG_COMPAT
1005 	.compat_ioctl =	unix_compat_ioctl,
1006 #endif
1007 	.listen =	sock_no_listen,
1008 	.shutdown =	unix_shutdown,
1009 	.sendmsg =	unix_dgram_sendmsg,
1010 	.read_skb =	unix_read_skb,
1011 	.recvmsg =	unix_dgram_recvmsg,
1012 	.mmap =		sock_no_mmap,
1013 	.set_peek_off =	sk_set_peek_off,
1014 	.show_fdinfo =	unix_show_fdinfo,
1015 };
1016 
1017 static const struct proto_ops unix_seqpacket_ops = {
1018 	.family =	PF_UNIX,
1019 	.owner =	THIS_MODULE,
1020 	.release =	unix_release,
1021 	.bind =		unix_bind,
1022 	.connect =	unix_stream_connect,
1023 	.socketpair =	unix_socketpair,
1024 	.accept =	unix_accept,
1025 	.getname =	unix_getname,
1026 	.poll =		unix_dgram_poll,
1027 	.ioctl =	unix_ioctl,
1028 #ifdef CONFIG_COMPAT
1029 	.compat_ioctl =	unix_compat_ioctl,
1030 #endif
1031 	.listen =	unix_listen,
1032 	.shutdown =	unix_shutdown,
1033 	.sendmsg =	unix_seqpacket_sendmsg,
1034 	.recvmsg =	unix_seqpacket_recvmsg,
1035 	.mmap =		sock_no_mmap,
1036 	.set_peek_off =	sk_set_peek_off,
1037 	.show_fdinfo =	unix_show_fdinfo,
1038 };
1039 
1040 static void unix_close(struct sock *sk, long timeout)
1041 {
1042 	/* Nothing to do here, unix socket does not need a ->close().
1043 	 * This is merely for sockmap.
1044 	 */
1045 }
1046 
1047 static bool unix_bpf_bypass_getsockopt(int level, int optname)
1048 {
1049 	if (level == SOL_SOCKET) {
1050 		switch (optname) {
1051 		case SO_PEERPIDFD:
1052 			return true;
1053 		default:
1054 			return false;
1055 		}
1056 	}
1057 
1058 	return false;
1059 }
1060 
1061 struct proto unix_dgram_proto = {
1062 	.name			= "UNIX",
1063 	.owner			= THIS_MODULE,
1064 	.obj_size		= sizeof(struct unix_sock),
1065 	.close			= unix_close,
1066 	.bpf_bypass_getsockopt	= unix_bpf_bypass_getsockopt,
1067 #ifdef CONFIG_BPF_SYSCALL
1068 	.psock_update_sk_prot	= unix_dgram_bpf_update_proto,
1069 #endif
1070 };
1071 
1072 struct proto unix_stream_proto = {
1073 	.name			= "UNIX-STREAM",
1074 	.owner			= THIS_MODULE,
1075 	.obj_size		= sizeof(struct unix_sock),
1076 	.close			= unix_close,
1077 	.bpf_bypass_getsockopt	= unix_bpf_bypass_getsockopt,
1078 #ifdef CONFIG_BPF_SYSCALL
1079 	.psock_update_sk_prot	= unix_stream_bpf_update_proto,
1080 #endif
1081 };
1082 
1083 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
1084 {
1085 	struct unix_sock *u;
1086 	struct sock *sk;
1087 	int err;
1088 
1089 	atomic_long_inc(&unix_nr_socks);
1090 	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) {
1091 		err = -ENFILE;
1092 		goto err;
1093 	}
1094 
1095 	if (type == SOCK_STREAM)
1096 		sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
1097 	else /*dgram and  seqpacket */
1098 		sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
1099 
1100 	if (!sk) {
1101 		err = -ENOMEM;
1102 		goto err;
1103 	}
1104 
1105 	sock_init_data(sock, sk);
1106 
1107 	sk->sk_scm_rights	= 1;
1108 	sk->sk_hash		= unix_unbound_hash(sk);
1109 	sk->sk_allocation	= GFP_KERNEL_ACCOUNT;
1110 	sk->sk_write_space	= unix_write_space;
1111 	sk->sk_max_ack_backlog	= READ_ONCE(net->unx.sysctl_max_dgram_qlen);
1112 	sk->sk_destruct		= unix_sock_destructor;
1113 	lock_set_cmp_fn(&sk->sk_receive_queue.lock, unix_recvq_lock_cmp_fn, NULL);
1114 
1115 	u = unix_sk(sk);
1116 	u->listener = NULL;
1117 	u->vertex = NULL;
1118 	u->path.dentry = NULL;
1119 	u->path.mnt = NULL;
1120 	spin_lock_init(&u->lock);
1121 	lock_set_cmp_fn(&u->lock, unix_state_lock_cmp_fn, NULL);
1122 	mutex_init(&u->iolock); /* single task reading lock */
1123 	mutex_init(&u->bindlock); /* single task binding lock */
1124 	init_waitqueue_head(&u->peer_wait);
1125 	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
1126 	memset(&u->scm_stat, 0, sizeof(struct scm_stat));
1127 	unix_insert_unbound_socket(net, sk);
1128 
1129 	sock_prot_inuse_add(net, sk->sk_prot, 1);
1130 
1131 	return sk;
1132 
1133 err:
1134 	atomic_long_dec(&unix_nr_socks);
1135 	return ERR_PTR(err);
1136 }
1137 
1138 static int unix_create(struct net *net, struct socket *sock, int protocol,
1139 		       int kern)
1140 {
1141 	struct sock *sk;
1142 
1143 	if (protocol && protocol != PF_UNIX)
1144 		return -EPROTONOSUPPORT;
1145 
1146 	sock->state = SS_UNCONNECTED;
1147 
1148 	switch (sock->type) {
1149 	case SOCK_STREAM:
1150 		set_bit(SOCK_CUSTOM_SOCKOPT, &sock->flags);
1151 		sock->ops = &unix_stream_ops;
1152 		break;
1153 		/*
1154 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
1155 		 *	nothing uses it.
1156 		 */
1157 	case SOCK_RAW:
1158 		sock->type = SOCK_DGRAM;
1159 		fallthrough;
1160 	case SOCK_DGRAM:
1161 		sock->ops = &unix_dgram_ops;
1162 		break;
1163 	case SOCK_SEQPACKET:
1164 		sock->ops = &unix_seqpacket_ops;
1165 		break;
1166 	default:
1167 		return -ESOCKTNOSUPPORT;
1168 	}
1169 
1170 	sk = unix_create1(net, sock, kern, sock->type);
1171 	if (IS_ERR(sk))
1172 		return PTR_ERR(sk);
1173 
1174 	return 0;
1175 }
1176 
1177 static int unix_release(struct socket *sock)
1178 {
1179 	struct sock *sk = sock->sk;
1180 
1181 	if (!sk)
1182 		return 0;
1183 
1184 	sk->sk_prot->close(sk, 0);
1185 	unix_release_sock(sk, 0);
1186 	sock->sk = NULL;
1187 
1188 	return 0;
1189 }
1190 
1191 static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len,
1192 				  int type, int flags)
1193 {
1194 	struct inode *inode;
1195 	struct path path;
1196 	struct sock *sk;
1197 	int err;
1198 
1199 	unix_mkname_bsd(sunaddr, addr_len);
1200 
1201 	if (flags & SOCK_COREDUMP) {
1202 		const struct cred *cred;
1203 		struct cred *kcred;
1204 		struct path root;
1205 
1206 		kcred = prepare_kernel_cred(&init_task);
1207 		if (!kcred) {
1208 			err = -ENOMEM;
1209 			goto fail;
1210 		}
1211 
1212 		task_lock(&init_task);
1213 		get_fs_root(init_task.fs, &root);
1214 		task_unlock(&init_task);
1215 
1216 		cred = override_creds(kcred);
1217 		err = vfs_path_lookup(root.dentry, root.mnt, sunaddr->sun_path,
1218 				      LOOKUP_BENEATH | LOOKUP_NO_SYMLINKS |
1219 				      LOOKUP_NO_MAGICLINKS, &path);
1220 		put_cred(revert_creds(cred));
1221 		path_put(&root);
1222 		if (err)
1223 			goto fail;
1224 	} else {
1225 		err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path);
1226 		if (err)
1227 			goto fail;
1228 
1229 		err = path_permission(&path, MAY_WRITE);
1230 		if (err)
1231 			goto path_put;
1232 	}
1233 
1234 	err = -ECONNREFUSED;
1235 	inode = d_backing_inode(path.dentry);
1236 	if (!S_ISSOCK(inode->i_mode))
1237 		goto path_put;
1238 
1239 	sk = unix_find_socket_byinode(inode);
1240 	if (!sk)
1241 		goto path_put;
1242 
1243 	err = -EPROTOTYPE;
1244 	if (sk->sk_type == type)
1245 		touch_atime(&path);
1246 	else
1247 		goto sock_put;
1248 
1249 	path_put(&path);
1250 
1251 	return sk;
1252 
1253 sock_put:
1254 	sock_put(sk);
1255 path_put:
1256 	path_put(&path);
1257 fail:
1258 	return ERR_PTR(err);
1259 }
1260 
1261 static struct sock *unix_find_abstract(struct net *net,
1262 				       struct sockaddr_un *sunaddr,
1263 				       int addr_len, int type)
1264 {
1265 	unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type);
1266 	struct dentry *dentry;
1267 	struct sock *sk;
1268 
1269 	sk = unix_find_socket_byname(net, sunaddr, addr_len, hash);
1270 	if (!sk)
1271 		return ERR_PTR(-ECONNREFUSED);
1272 
1273 	dentry = unix_sk(sk)->path.dentry;
1274 	if (dentry)
1275 		touch_atime(&unix_sk(sk)->path);
1276 
1277 	return sk;
1278 }
1279 
1280 static struct sock *unix_find_other(struct net *net,
1281 				    struct sockaddr_un *sunaddr,
1282 				    int addr_len, int type, int flags)
1283 {
1284 	struct sock *sk;
1285 
1286 	if (sunaddr->sun_path[0])
1287 		sk = unix_find_bsd(sunaddr, addr_len, type, flags);
1288 	else
1289 		sk = unix_find_abstract(net, sunaddr, addr_len, type);
1290 
1291 	return sk;
1292 }
1293 
1294 static int unix_autobind(struct sock *sk)
1295 {
1296 	struct unix_sock *u = unix_sk(sk);
1297 	unsigned int new_hash, old_hash;
1298 	struct net *net = sock_net(sk);
1299 	struct unix_address *addr;
1300 	u32 lastnum, ordernum;
1301 	int err;
1302 
1303 	err = mutex_lock_interruptible(&u->bindlock);
1304 	if (err)
1305 		return err;
1306 
1307 	if (u->addr)
1308 		goto out;
1309 
1310 	err = -ENOMEM;
1311 	addr = kzalloc(sizeof(*addr) +
1312 		       offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL);
1313 	if (!addr)
1314 		goto out;
1315 
1316 	addr->len = offsetof(struct sockaddr_un, sun_path) + 6;
1317 	addr->name->sun_family = AF_UNIX;
1318 	refcount_set(&addr->refcnt, 1);
1319 
1320 	old_hash = sk->sk_hash;
1321 	ordernum = get_random_u32();
1322 	lastnum = ordernum & 0xFFFFF;
1323 retry:
1324 	ordernum = (ordernum + 1) & 0xFFFFF;
1325 	sprintf(addr->name->sun_path + 1, "%05x", ordernum);
1326 
1327 	new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
1328 	unix_table_double_lock(net, old_hash, new_hash);
1329 
1330 	if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) {
1331 		unix_table_double_unlock(net, old_hash, new_hash);
1332 
1333 		/* __unix_find_socket_byname() may take long time if many names
1334 		 * are already in use.
1335 		 */
1336 		cond_resched();
1337 
1338 		if (ordernum == lastnum) {
1339 			/* Give up if all names seems to be in use. */
1340 			err = -ENOSPC;
1341 			unix_release_addr(addr);
1342 			goto out;
1343 		}
1344 
1345 		goto retry;
1346 	}
1347 
1348 	__unix_set_addr_hash(net, sk, addr, new_hash);
1349 	unix_table_double_unlock(net, old_hash, new_hash);
1350 	err = 0;
1351 
1352 out:	mutex_unlock(&u->bindlock);
1353 	return err;
1354 }
1355 
1356 static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
1357 			 int addr_len)
1358 {
1359 	umode_t mode = S_IFSOCK |
1360 	       (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
1361 	struct unix_sock *u = unix_sk(sk);
1362 	unsigned int new_hash, old_hash;
1363 	struct net *net = sock_net(sk);
1364 	struct mnt_idmap *idmap;
1365 	struct unix_address *addr;
1366 	struct dentry *dentry;
1367 	struct path parent;
1368 	int err;
1369 
1370 	addr_len = unix_mkname_bsd(sunaddr, addr_len);
1371 	addr = unix_create_addr(sunaddr, addr_len);
1372 	if (!addr)
1373 		return -ENOMEM;
1374 
1375 	/*
1376 	 * Get the parent directory, calculate the hash for last
1377 	 * component.
1378 	 */
1379 	dentry = start_creating_path(AT_FDCWD, addr->name->sun_path, &parent, 0);
1380 	if (IS_ERR(dentry)) {
1381 		err = PTR_ERR(dentry);
1382 		goto out;
1383 	}
1384 
1385 	/*
1386 	 * All right, let's create it.
1387 	 */
1388 	idmap = mnt_idmap(parent.mnt);
1389 	err = security_path_mknod(&parent, dentry, mode, 0);
1390 	if (!err)
1391 		err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0);
1392 	if (err)
1393 		goto out_path;
1394 	err = mutex_lock_interruptible(&u->bindlock);
1395 	if (err)
1396 		goto out_unlink;
1397 	if (u->addr)
1398 		goto out_unlock;
1399 
1400 	old_hash = sk->sk_hash;
1401 	new_hash = unix_bsd_hash(d_backing_inode(dentry));
1402 	unix_table_double_lock(net, old_hash, new_hash);
1403 	u->path.mnt = mntget(parent.mnt);
1404 	u->path.dentry = dget(dentry);
1405 	__unix_set_addr_hash(net, sk, addr, new_hash);
1406 	unix_table_double_unlock(net, old_hash, new_hash);
1407 	unix_insert_bsd_socket(sk);
1408 	mutex_unlock(&u->bindlock);
1409 	end_creating_path(&parent, dentry);
1410 	return 0;
1411 
1412 out_unlock:
1413 	mutex_unlock(&u->bindlock);
1414 	err = -EINVAL;
1415 out_unlink:
1416 	/* failed after successful mknod?  unlink what we'd created... */
1417 	vfs_unlink(idmap, d_inode(parent.dentry), dentry, NULL);
1418 out_path:
1419 	end_creating_path(&parent, dentry);
1420 out:
1421 	unix_release_addr(addr);
1422 	return err == -EEXIST ? -EADDRINUSE : err;
1423 }
1424 
1425 static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
1426 			      int addr_len)
1427 {
1428 	struct unix_sock *u = unix_sk(sk);
1429 	unsigned int new_hash, old_hash;
1430 	struct net *net = sock_net(sk);
1431 	struct unix_address *addr;
1432 	int err;
1433 
1434 	addr = unix_create_addr(sunaddr, addr_len);
1435 	if (!addr)
1436 		return -ENOMEM;
1437 
1438 	err = mutex_lock_interruptible(&u->bindlock);
1439 	if (err)
1440 		goto out;
1441 
1442 	if (u->addr) {
1443 		err = -EINVAL;
1444 		goto out_mutex;
1445 	}
1446 
1447 	old_hash = sk->sk_hash;
1448 	new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
1449 	unix_table_double_lock(net, old_hash, new_hash);
1450 
1451 	if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash))
1452 		goto out_spin;
1453 
1454 	__unix_set_addr_hash(net, sk, addr, new_hash);
1455 	unix_table_double_unlock(net, old_hash, new_hash);
1456 	mutex_unlock(&u->bindlock);
1457 	return 0;
1458 
1459 out_spin:
1460 	unix_table_double_unlock(net, old_hash, new_hash);
1461 	err = -EADDRINUSE;
1462 out_mutex:
1463 	mutex_unlock(&u->bindlock);
1464 out:
1465 	unix_release_addr(addr);
1466 	return err;
1467 }
1468 
1469 static int unix_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len)
1470 {
1471 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1472 	struct sock *sk = sock->sk;
1473 	int err;
1474 
1475 	if (addr_len == offsetof(struct sockaddr_un, sun_path) &&
1476 	    sunaddr->sun_family == AF_UNIX)
1477 		return unix_autobind(sk);
1478 
1479 	err = unix_validate_addr(sunaddr, addr_len);
1480 	if (err)
1481 		return err;
1482 
1483 	if (sunaddr->sun_path[0])
1484 		err = unix_bind_bsd(sk, sunaddr, addr_len);
1485 	else
1486 		err = unix_bind_abstract(sk, sunaddr, addr_len);
1487 
1488 	return err;
1489 }
1490 
1491 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1492 {
1493 	if (unlikely(sk1 == sk2) || !sk2) {
1494 		unix_state_lock(sk1);
1495 		return;
1496 	}
1497 
1498 	if (sk1 > sk2)
1499 		swap(sk1, sk2);
1500 
1501 	unix_state_lock(sk1);
1502 	unix_state_lock(sk2);
1503 }
1504 
1505 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1506 {
1507 	if (unlikely(sk1 == sk2) || !sk2) {
1508 		unix_state_unlock(sk1);
1509 		return;
1510 	}
1511 	unix_state_unlock(sk1);
1512 	unix_state_unlock(sk2);
1513 }
1514 
1515 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1516 			      int alen, int flags)
1517 {
1518 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1519 	struct sock *sk = sock->sk;
1520 	struct sock *other;
1521 	int err;
1522 
1523 	err = -EINVAL;
1524 	if (alen < offsetofend(struct sockaddr, sa_family))
1525 		goto out;
1526 
1527 	if (addr->sa_family != AF_UNSPEC) {
1528 		err = unix_validate_addr(sunaddr, alen);
1529 		if (err)
1530 			goto out;
1531 
1532 		err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, addr, &alen);
1533 		if (err)
1534 			goto out;
1535 
1536 		if (unix_may_passcred(sk) && !READ_ONCE(unix_sk(sk)->addr)) {
1537 			err = unix_autobind(sk);
1538 			if (err)
1539 				goto out;
1540 		}
1541 
1542 restart:
1543 		other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type, 0);
1544 		if (IS_ERR(other)) {
1545 			err = PTR_ERR(other);
1546 			goto out;
1547 		}
1548 
1549 		unix_state_double_lock(sk, other);
1550 
1551 		/* Apparently VFS overslept socket death. Retry. */
1552 		if (sock_flag(other, SOCK_DEAD)) {
1553 			unix_state_double_unlock(sk, other);
1554 			sock_put(other);
1555 			goto restart;
1556 		}
1557 
1558 		err = -EPERM;
1559 		if (!unix_may_send(sk, other))
1560 			goto out_unlock;
1561 
1562 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1563 		if (err)
1564 			goto out_unlock;
1565 
1566 		WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED);
1567 		WRITE_ONCE(other->sk_state, TCP_ESTABLISHED);
1568 	} else {
1569 		/*
1570 		 *	1003.1g breaking connected state with AF_UNSPEC
1571 		 */
1572 		other = NULL;
1573 		unix_state_double_lock(sk, other);
1574 	}
1575 
1576 	/*
1577 	 * If it was connected, reconnect.
1578 	 */
1579 	if (unix_peer(sk)) {
1580 		struct sock *old_peer = unix_peer(sk);
1581 
1582 		unix_peer(sk) = other;
1583 		if (!other)
1584 			WRITE_ONCE(sk->sk_state, TCP_CLOSE);
1585 		unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1586 
1587 		unix_state_double_unlock(sk, other);
1588 
1589 		if (other != old_peer) {
1590 			unix_dgram_disconnected(sk, old_peer);
1591 
1592 			unix_state_lock(old_peer);
1593 			if (!unix_peer(old_peer))
1594 				WRITE_ONCE(old_peer->sk_state, TCP_CLOSE);
1595 			unix_state_unlock(old_peer);
1596 		}
1597 
1598 		sock_put(old_peer);
1599 	} else {
1600 		unix_peer(sk) = other;
1601 		unix_state_double_unlock(sk, other);
1602 	}
1603 
1604 	return 0;
1605 
1606 out_unlock:
1607 	unix_state_double_unlock(sk, other);
1608 	sock_put(other);
1609 out:
1610 	return err;
1611 }
1612 
1613 static long unix_wait_for_peer(struct sock *other, long timeo)
1614 {
1615 	struct unix_sock *u = unix_sk(other);
1616 	int sched;
1617 	DEFINE_WAIT(wait);
1618 
1619 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1620 
1621 	sched = !sock_flag(other, SOCK_DEAD) &&
1622 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1623 		unix_recvq_full_lockless(other);
1624 
1625 	unix_state_unlock(other);
1626 
1627 	if (sched)
1628 		timeo = schedule_timeout(timeo);
1629 
1630 	finish_wait(&u->peer_wait, &wait);
1631 	return timeo;
1632 }
1633 
1634 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1635 			       int addr_len, int flags)
1636 {
1637 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1638 	struct sock *sk = sock->sk, *newsk = NULL, *other = NULL;
1639 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1640 	struct unix_peercred peercred = {};
1641 	struct net *net = sock_net(sk);
1642 	struct sk_buff *skb = NULL;
1643 	unsigned char state;
1644 	long timeo;
1645 	int err;
1646 
1647 	err = unix_validate_addr(sunaddr, addr_len);
1648 	if (err)
1649 		goto out;
1650 
1651 	err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, &addr_len);
1652 	if (err)
1653 		goto out;
1654 
1655 	if (unix_may_passcred(sk) && !READ_ONCE(u->addr)) {
1656 		err = unix_autobind(sk);
1657 		if (err)
1658 			goto out;
1659 	}
1660 
1661 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1662 
1663 	/* First of all allocate resources.
1664 	 * If we will make it after state is locked,
1665 	 * we will have to recheck all again in any case.
1666 	 */
1667 
1668 	/* create new sock for complete connection */
1669 	newsk = unix_create1(net, NULL, 0, sock->type);
1670 	if (IS_ERR(newsk)) {
1671 		err = PTR_ERR(newsk);
1672 		goto out;
1673 	}
1674 
1675 	err = prepare_peercred(&peercred);
1676 	if (err)
1677 		goto out;
1678 
1679 	/* Allocate skb for sending to listening sock */
1680 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1681 	if (!skb) {
1682 		err = -ENOMEM;
1683 		goto out_free_sk;
1684 	}
1685 
1686 restart:
1687 	/*  Find listening sock. */
1688 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, flags);
1689 	if (IS_ERR(other)) {
1690 		err = PTR_ERR(other);
1691 		goto out_free_skb;
1692 	}
1693 
1694 	unix_state_lock(other);
1695 
1696 	/* Apparently VFS overslept socket death. Retry. */
1697 	if (sock_flag(other, SOCK_DEAD)) {
1698 		unix_state_unlock(other);
1699 		sock_put(other);
1700 		goto restart;
1701 	}
1702 
1703 	if (other->sk_state != TCP_LISTEN ||
1704 	    other->sk_shutdown & RCV_SHUTDOWN) {
1705 		err = -ECONNREFUSED;
1706 		goto out_unlock;
1707 	}
1708 
1709 	if (unix_recvq_full_lockless(other)) {
1710 		if (!timeo) {
1711 			err = -EAGAIN;
1712 			goto out_unlock;
1713 		}
1714 
1715 		timeo = unix_wait_for_peer(other, timeo);
1716 		sock_put(other);
1717 
1718 		err = sock_intr_errno(timeo);
1719 		if (signal_pending(current))
1720 			goto out_free_skb;
1721 
1722 		goto restart;
1723 	}
1724 
1725 	/* self connect and simultaneous connect are eliminated
1726 	 * by rejecting TCP_LISTEN socket to avoid deadlock.
1727 	 */
1728 	state = READ_ONCE(sk->sk_state);
1729 	if (unlikely(state != TCP_CLOSE)) {
1730 		err = state == TCP_ESTABLISHED ? -EISCONN : -EINVAL;
1731 		goto out_unlock;
1732 	}
1733 
1734 	unix_state_lock(sk);
1735 
1736 	if (unlikely(sk->sk_state != TCP_CLOSE)) {
1737 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EINVAL;
1738 		unix_state_unlock(sk);
1739 		goto out_unlock;
1740 	}
1741 
1742 	err = security_unix_stream_connect(sk, other, newsk);
1743 	if (err) {
1744 		unix_state_unlock(sk);
1745 		goto out_unlock;
1746 	}
1747 
1748 	/* The way is open! Fastly set all the necessary fields... */
1749 
1750 	sock_hold(sk);
1751 	unix_peer(newsk) = sk;
1752 	newsk->sk_state = TCP_ESTABLISHED;
1753 	newsk->sk_type = sk->sk_type;
1754 	newsk->sk_scm_recv_flags = other->sk_scm_recv_flags;
1755 	init_peercred(newsk, &peercred);
1756 
1757 	newu = unix_sk(newsk);
1758 	newu->listener = other;
1759 	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1760 	otheru = unix_sk(other);
1761 
1762 	/* copy address information from listening to new sock
1763 	 *
1764 	 * The contents of *(otheru->addr) and otheru->path
1765 	 * are seen fully set up here, since we have found
1766 	 * otheru in hash under its lock.  Insertion into the
1767 	 * hash chain we'd found it in had been done in an
1768 	 * earlier critical area protected by the chain's lock,
1769 	 * the same one where we'd set *(otheru->addr) contents,
1770 	 * as well as otheru->path and otheru->addr itself.
1771 	 *
1772 	 * Using smp_store_release() here to set newu->addr
1773 	 * is enough to make those stores, as well as stores
1774 	 * to newu->path visible to anyone who gets newu->addr
1775 	 * by smp_load_acquire().  IOW, the same warranties
1776 	 * as for unix_sock instances bound in unix_bind() or
1777 	 * in unix_autobind().
1778 	 */
1779 	if (otheru->path.dentry) {
1780 		path_get(&otheru->path);
1781 		newu->path = otheru->path;
1782 	}
1783 	refcount_inc(&otheru->addr->refcnt);
1784 	smp_store_release(&newu->addr, otheru->addr);
1785 
1786 	/* Set credentials */
1787 	copy_peercred(sk, other);
1788 
1789 	sock->state	= SS_CONNECTED;
1790 	WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED);
1791 	sock_hold(newsk);
1792 
1793 	smp_mb__after_atomic();	/* sock_hold() does an atomic_inc() */
1794 	unix_peer(sk)	= newsk;
1795 
1796 	unix_state_unlock(sk);
1797 
1798 	/* take ten and send info to listening sock */
1799 	spin_lock(&other->sk_receive_queue.lock);
1800 	__skb_queue_tail(&other->sk_receive_queue, skb);
1801 	spin_unlock(&other->sk_receive_queue.lock);
1802 	unix_state_unlock(other);
1803 	other->sk_data_ready(other);
1804 	sock_put(other);
1805 	return 0;
1806 
1807 out_unlock:
1808 	unix_state_unlock(other);
1809 	sock_put(other);
1810 out_free_skb:
1811 	consume_skb(skb);
1812 out_free_sk:
1813 	unix_release_sock(newsk, 0);
1814 out:
1815 	drop_peercred(&peercred);
1816 	return err;
1817 }
1818 
1819 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1820 {
1821 	struct unix_peercred ska_peercred = {}, skb_peercred = {};
1822 	struct sock *ska = socka->sk, *skb = sockb->sk;
1823 	int err;
1824 
1825 	err = prepare_peercred(&ska_peercred);
1826 	if (err)
1827 		return err;
1828 
1829 	err = prepare_peercred(&skb_peercred);
1830 	if (err) {
1831 		drop_peercred(&ska_peercred);
1832 		return err;
1833 	}
1834 
1835 	/* Join our sockets back to back */
1836 	sock_hold(ska);
1837 	sock_hold(skb);
1838 	unix_peer(ska) = skb;
1839 	unix_peer(skb) = ska;
1840 	init_peercred(ska, &ska_peercred);
1841 	init_peercred(skb, &skb_peercred);
1842 
1843 	ska->sk_state = TCP_ESTABLISHED;
1844 	skb->sk_state = TCP_ESTABLISHED;
1845 	socka->state  = SS_CONNECTED;
1846 	sockb->state  = SS_CONNECTED;
1847 	return 0;
1848 }
1849 
1850 static int unix_accept(struct socket *sock, struct socket *newsock,
1851 		       struct proto_accept_arg *arg)
1852 {
1853 	struct sock *sk = sock->sk;
1854 	struct sk_buff *skb;
1855 	struct sock *tsk;
1856 
1857 	arg->err = -EOPNOTSUPP;
1858 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1859 		goto out;
1860 
1861 	arg->err = -EINVAL;
1862 	if (READ_ONCE(sk->sk_state) != TCP_LISTEN)
1863 		goto out;
1864 
1865 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1866 	 * so that no locks are necessary.
1867 	 */
1868 
1869 	skb = skb_recv_datagram(sk, (arg->flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
1870 				&arg->err);
1871 	if (!skb) {
1872 		/* This means receive shutdown. */
1873 		if (arg->err == 0)
1874 			arg->err = -EINVAL;
1875 		goto out;
1876 	}
1877 
1878 	tsk = skb->sk;
1879 	skb_free_datagram(sk, skb);
1880 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1881 
1882 	if (tsk->sk_type == SOCK_STREAM)
1883 		set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags);
1884 
1885 	/* attach accepted sock to socket */
1886 	unix_state_lock(tsk);
1887 	unix_update_edges(unix_sk(tsk));
1888 	newsock->state = SS_CONNECTED;
1889 	sock_graft(tsk, newsock);
1890 	unix_state_unlock(tsk);
1891 	return 0;
1892 
1893 out:
1894 	return arg->err;
1895 }
1896 
1897 
1898 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1899 {
1900 	struct sock *sk = sock->sk;
1901 	struct unix_address *addr;
1902 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1903 	int err = 0;
1904 
1905 	if (peer) {
1906 		sk = unix_peer_get(sk);
1907 
1908 		err = -ENOTCONN;
1909 		if (!sk)
1910 			goto out;
1911 		err = 0;
1912 	} else {
1913 		sock_hold(sk);
1914 	}
1915 
1916 	addr = smp_load_acquire(&unix_sk(sk)->addr);
1917 	if (!addr) {
1918 		sunaddr->sun_family = AF_UNIX;
1919 		sunaddr->sun_path[0] = 0;
1920 		err = offsetof(struct sockaddr_un, sun_path);
1921 	} else {
1922 		err = addr->len;
1923 		memcpy(sunaddr, addr->name, addr->len);
1924 
1925 		if (peer)
1926 			BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err,
1927 					       CGROUP_UNIX_GETPEERNAME);
1928 		else
1929 			BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err,
1930 					       CGROUP_UNIX_GETSOCKNAME);
1931 	}
1932 	sock_put(sk);
1933 out:
1934 	return err;
1935 }
1936 
1937 /* The "user->unix_inflight" variable is protected by the garbage
1938  * collection lock, and we just read it locklessly here. If you go
1939  * over the limit, there might be a tiny race in actually noticing
1940  * it across threads. Tough.
1941  */
1942 static inline bool too_many_unix_fds(struct task_struct *p)
1943 {
1944 	struct user_struct *user = current_user();
1945 
1946 	if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE)))
1947 		return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1948 	return false;
1949 }
1950 
1951 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1952 {
1953 	if (too_many_unix_fds(current))
1954 		return -ETOOMANYREFS;
1955 
1956 	UNIXCB(skb).fp = scm->fp;
1957 	scm->fp = NULL;
1958 
1959 	if (unix_prepare_fpl(UNIXCB(skb).fp))
1960 		return -ENOMEM;
1961 
1962 	return 0;
1963 }
1964 
1965 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1966 {
1967 	scm->fp = UNIXCB(skb).fp;
1968 	UNIXCB(skb).fp = NULL;
1969 
1970 	unix_destroy_fpl(scm->fp);
1971 }
1972 
1973 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1974 {
1975 	scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1976 }
1977 
1978 static void unix_destruct_scm(struct sk_buff *skb)
1979 {
1980 	struct scm_cookie scm;
1981 
1982 	memset(&scm, 0, sizeof(scm));
1983 	scm.pid = UNIXCB(skb).pid;
1984 	if (UNIXCB(skb).fp)
1985 		unix_detach_fds(&scm, skb);
1986 
1987 	/* Alas, it calls VFS */
1988 	/* So fscking what? fput() had been SMP-safe since the last Summer */
1989 	scm_destroy(&scm);
1990 	sock_wfree(skb);
1991 }
1992 
1993 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1994 {
1995 	int err = 0;
1996 
1997 	UNIXCB(skb).pid = get_pid(scm->pid);
1998 	UNIXCB(skb).uid = scm->creds.uid;
1999 	UNIXCB(skb).gid = scm->creds.gid;
2000 	UNIXCB(skb).fp = NULL;
2001 	unix_get_secdata(scm, skb);
2002 	if (scm->fp && send_fds)
2003 		err = unix_attach_fds(scm, skb);
2004 
2005 	skb->destructor = unix_destruct_scm;
2006 	return err;
2007 }
2008 
2009 static void unix_skb_to_scm(struct sk_buff *skb, struct scm_cookie *scm)
2010 {
2011 	scm_set_cred(scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2012 	unix_set_secdata(scm, skb);
2013 }
2014 
2015 /**
2016  * unix_maybe_add_creds() - Adds current task uid/gid and struct pid to skb if needed.
2017  * @skb: skb to attach creds to.
2018  * @sk: Sender sock.
2019  * @other: Receiver sock.
2020  *
2021  * Some apps rely on write() giving SCM_CREDENTIALS
2022  * We include credentials if source or destination socket
2023  * asserted SOCK_PASSCRED.
2024  *
2025  * Context: May sleep.
2026  * Return: On success zero, on error a negative error code is returned.
2027  */
2028 static int unix_maybe_add_creds(struct sk_buff *skb, const struct sock *sk,
2029 				const struct sock *other)
2030 {
2031 	if (UNIXCB(skb).pid)
2032 		return 0;
2033 
2034 	if (unix_may_passcred(sk) || unix_may_passcred(other) ||
2035 	    !other->sk_socket) {
2036 		struct pid *pid;
2037 		int err;
2038 
2039 		pid = task_tgid(current);
2040 		err = pidfs_register_pid(pid);
2041 		if (unlikely(err))
2042 			return err;
2043 
2044 		UNIXCB(skb).pid = get_pid(pid);
2045 		current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
2046 	}
2047 
2048 	return 0;
2049 }
2050 
2051 static bool unix_skb_scm_eq(struct sk_buff *skb,
2052 			    struct scm_cookie *scm)
2053 {
2054 	return UNIXCB(skb).pid == scm->pid &&
2055 	       uid_eq(UNIXCB(skb).uid, scm->creds.uid) &&
2056 	       gid_eq(UNIXCB(skb).gid, scm->creds.gid) &&
2057 	       unix_secdata_eq(scm, skb);
2058 }
2059 
2060 static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
2061 {
2062 	struct scm_fp_list *fp = UNIXCB(skb).fp;
2063 	struct unix_sock *u = unix_sk(sk);
2064 
2065 	if (unlikely(fp && fp->count)) {
2066 		atomic_add(fp->count, &u->scm_stat.nr_fds);
2067 		unix_add_edges(fp, u);
2068 	}
2069 }
2070 
2071 static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
2072 {
2073 	struct scm_fp_list *fp = UNIXCB(skb).fp;
2074 	struct unix_sock *u = unix_sk(sk);
2075 
2076 	if (unlikely(fp && fp->count)) {
2077 		atomic_sub(fp->count, &u->scm_stat.nr_fds);
2078 		unix_del_edges(fp);
2079 	}
2080 }
2081 
2082 /*
2083  *	Send AF_UNIX data.
2084  */
2085 
2086 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
2087 			      size_t len)
2088 {
2089 	struct sock *sk = sock->sk, *other = NULL;
2090 	struct unix_sock *u = unix_sk(sk);
2091 	struct scm_cookie scm;
2092 	struct sk_buff *skb;
2093 	int data_len = 0;
2094 	int sk_locked;
2095 	long timeo;
2096 	int err;
2097 
2098 	err = scm_send(sock, msg, &scm, false);
2099 	if (err < 0)
2100 		return err;
2101 
2102 	wait_for_unix_gc(scm.fp);
2103 
2104 	if (msg->msg_flags & MSG_OOB) {
2105 		err = -EOPNOTSUPP;
2106 		goto out;
2107 	}
2108 
2109 	if (msg->msg_namelen) {
2110 		err = unix_validate_addr(msg->msg_name, msg->msg_namelen);
2111 		if (err)
2112 			goto out;
2113 
2114 		err = BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk,
2115 							    msg->msg_name,
2116 							    &msg->msg_namelen,
2117 							    NULL);
2118 		if (err)
2119 			goto out;
2120 	}
2121 
2122 	if (unix_may_passcred(sk) && !READ_ONCE(u->addr)) {
2123 		err = unix_autobind(sk);
2124 		if (err)
2125 			goto out;
2126 	}
2127 
2128 	if (len > READ_ONCE(sk->sk_sndbuf) - 32) {
2129 		err = -EMSGSIZE;
2130 		goto out;
2131 	}
2132 
2133 	if (len > SKB_MAX_ALLOC) {
2134 		data_len = min_t(size_t,
2135 				 len - SKB_MAX_ALLOC,
2136 				 MAX_SKB_FRAGS * PAGE_SIZE);
2137 		data_len = PAGE_ALIGN(data_len);
2138 
2139 		BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
2140 	}
2141 
2142 	skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
2143 				   msg->msg_flags & MSG_DONTWAIT, &err,
2144 				   PAGE_ALLOC_COSTLY_ORDER);
2145 	if (!skb)
2146 		goto out;
2147 
2148 	err = unix_scm_to_skb(&scm, skb, true);
2149 	if (err < 0)
2150 		goto out_free;
2151 
2152 	skb_put(skb, len - data_len);
2153 	skb->data_len = data_len;
2154 	skb->len = len;
2155 	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
2156 	if (err)
2157 		goto out_free;
2158 
2159 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
2160 
2161 	if (msg->msg_namelen) {
2162 lookup:
2163 		other = unix_find_other(sock_net(sk), msg->msg_name,
2164 					msg->msg_namelen, sk->sk_type, 0);
2165 		if (IS_ERR(other)) {
2166 			err = PTR_ERR(other);
2167 			goto out_free;
2168 		}
2169 	} else {
2170 		other = unix_peer_get(sk);
2171 		if (!other) {
2172 			err = -ENOTCONN;
2173 			goto out_free;
2174 		}
2175 	}
2176 
2177 	if (sk_filter(other, skb) < 0) {
2178 		/* Toss the packet but do not return any error to the sender */
2179 		err = len;
2180 		goto out_sock_put;
2181 	}
2182 
2183 	err = unix_maybe_add_creds(skb, sk, other);
2184 	if (err)
2185 		goto out_sock_put;
2186 
2187 restart:
2188 	sk_locked = 0;
2189 	unix_state_lock(other);
2190 restart_locked:
2191 
2192 	if (!unix_may_send(sk, other)) {
2193 		err = -EPERM;
2194 		goto out_unlock;
2195 	}
2196 
2197 	if (unlikely(sock_flag(other, SOCK_DEAD))) {
2198 		/* Check with 1003.1g - what should datagram error */
2199 
2200 		unix_state_unlock(other);
2201 
2202 		if (sk->sk_type == SOCK_SEQPACKET) {
2203 			/* We are here only when racing with unix_release_sock()
2204 			 * is clearing @other. Never change state to TCP_CLOSE
2205 			 * unlike SOCK_DGRAM wants.
2206 			 */
2207 			err = -EPIPE;
2208 			goto out_sock_put;
2209 		}
2210 
2211 		if (!sk_locked)
2212 			unix_state_lock(sk);
2213 
2214 		if (unix_peer(sk) == other) {
2215 			unix_peer(sk) = NULL;
2216 			unix_dgram_peer_wake_disconnect_wakeup(sk, other);
2217 
2218 			WRITE_ONCE(sk->sk_state, TCP_CLOSE);
2219 			unix_state_unlock(sk);
2220 
2221 			unix_dgram_disconnected(sk, other);
2222 			sock_put(other);
2223 			err = -ECONNREFUSED;
2224 			goto out_sock_put;
2225 		}
2226 
2227 		unix_state_unlock(sk);
2228 
2229 		if (!msg->msg_namelen) {
2230 			err = -ECONNRESET;
2231 			goto out_sock_put;
2232 		}
2233 
2234 		sock_put(other);
2235 		goto lookup;
2236 	}
2237 
2238 	if (other->sk_shutdown & RCV_SHUTDOWN) {
2239 		err = -EPIPE;
2240 		goto out_unlock;
2241 	}
2242 
2243 	if (UNIXCB(skb).fp && !other->sk_scm_rights) {
2244 		err = -EPERM;
2245 		goto out_unlock;
2246 	}
2247 
2248 	if (sk->sk_type != SOCK_SEQPACKET) {
2249 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
2250 		if (err)
2251 			goto out_unlock;
2252 	}
2253 
2254 	/* other == sk && unix_peer(other) != sk if
2255 	 * - unix_peer(sk) == NULL, destination address bound to sk
2256 	 * - unix_peer(sk) == sk by time of get but disconnected before lock
2257 	 */
2258 	if (other != sk &&
2259 	    unlikely(unix_peer(other) != sk &&
2260 	    unix_recvq_full_lockless(other))) {
2261 		if (timeo) {
2262 			timeo = unix_wait_for_peer(other, timeo);
2263 
2264 			err = sock_intr_errno(timeo);
2265 			if (signal_pending(current))
2266 				goto out_sock_put;
2267 
2268 			goto restart;
2269 		}
2270 
2271 		if (!sk_locked) {
2272 			unix_state_unlock(other);
2273 			unix_state_double_lock(sk, other);
2274 		}
2275 
2276 		if (unix_peer(sk) != other ||
2277 		    unix_dgram_peer_wake_me(sk, other)) {
2278 			err = -EAGAIN;
2279 			sk_locked = 1;
2280 			goto out_unlock;
2281 		}
2282 
2283 		if (!sk_locked) {
2284 			sk_locked = 1;
2285 			goto restart_locked;
2286 		}
2287 	}
2288 
2289 	if (unlikely(sk_locked))
2290 		unix_state_unlock(sk);
2291 
2292 	if (sock_flag(other, SOCK_RCVTSTAMP))
2293 		__net_timestamp(skb);
2294 
2295 	scm_stat_add(other, skb);
2296 	skb_queue_tail(&other->sk_receive_queue, skb);
2297 	unix_state_unlock(other);
2298 	other->sk_data_ready(other);
2299 	sock_put(other);
2300 	scm_destroy(&scm);
2301 	return len;
2302 
2303 out_unlock:
2304 	if (sk_locked)
2305 		unix_state_unlock(sk);
2306 	unix_state_unlock(other);
2307 out_sock_put:
2308 	sock_put(other);
2309 out_free:
2310 	consume_skb(skb);
2311 out:
2312 	scm_destroy(&scm);
2313 	return err;
2314 }
2315 
2316 /* We use paged skbs for stream sockets, and limit occupancy to 32768
2317  * bytes, and a minimum of a full page.
2318  */
2319 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
2320 
2321 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2322 static int queue_oob(struct sock *sk, struct msghdr *msg, struct sock *other,
2323 		     struct scm_cookie *scm, bool fds_sent)
2324 {
2325 	struct unix_sock *ousk = unix_sk(other);
2326 	struct sk_buff *skb;
2327 	int err;
2328 
2329 	skb = sock_alloc_send_skb(sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
2330 
2331 	if (!skb)
2332 		return err;
2333 
2334 	err = unix_scm_to_skb(scm, skb, !fds_sent);
2335 	if (err < 0)
2336 		goto out;
2337 
2338 	err = unix_maybe_add_creds(skb, sk, other);
2339 	if (err)
2340 		goto out;
2341 
2342 	skb_put(skb, 1);
2343 	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
2344 
2345 	if (err)
2346 		goto out;
2347 
2348 	unix_state_lock(other);
2349 
2350 	if (sock_flag(other, SOCK_DEAD) ||
2351 	    (other->sk_shutdown & RCV_SHUTDOWN)) {
2352 		err = -EPIPE;
2353 		goto out_unlock;
2354 	}
2355 
2356 	if (UNIXCB(skb).fp && !other->sk_scm_rights) {
2357 		err = -EPERM;
2358 		goto out_unlock;
2359 	}
2360 
2361 	scm_stat_add(other, skb);
2362 
2363 	spin_lock(&other->sk_receive_queue.lock);
2364 	WRITE_ONCE(ousk->oob_skb, skb);
2365 	WRITE_ONCE(ousk->inq_len, ousk->inq_len + 1);
2366 	__skb_queue_tail(&other->sk_receive_queue, skb);
2367 	spin_unlock(&other->sk_receive_queue.lock);
2368 
2369 	sk_send_sigurg(other);
2370 	unix_state_unlock(other);
2371 	other->sk_data_ready(other);
2372 
2373 	return 0;
2374 out_unlock:
2375 	unix_state_unlock(other);
2376 out:
2377 	consume_skb(skb);
2378 	return err;
2379 }
2380 #endif
2381 
2382 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
2383 			       size_t len)
2384 {
2385 	struct sock *sk = sock->sk;
2386 	struct sk_buff *skb = NULL;
2387 	struct sock *other = NULL;
2388 	struct unix_sock *otheru;
2389 	struct scm_cookie scm;
2390 	bool fds_sent = false;
2391 	int err, sent = 0;
2392 
2393 	err = scm_send(sock, msg, &scm, false);
2394 	if (err < 0)
2395 		return err;
2396 
2397 	wait_for_unix_gc(scm.fp);
2398 
2399 	if (msg->msg_flags & MSG_OOB) {
2400 		err = -EOPNOTSUPP;
2401 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2402 		if (len)
2403 			len--;
2404 		else
2405 #endif
2406 			goto out_err;
2407 	}
2408 
2409 	if (msg->msg_namelen) {
2410 		err = READ_ONCE(sk->sk_state) == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
2411 		goto out_err;
2412 	}
2413 
2414 	other = unix_peer(sk);
2415 	if (!other) {
2416 		err = -ENOTCONN;
2417 		goto out_err;
2418 	}
2419 
2420 	otheru = unix_sk(other);
2421 
2422 	if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
2423 		goto out_pipe;
2424 
2425 	while (sent < len) {
2426 		int size = len - sent;
2427 		int data_len;
2428 
2429 		if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
2430 			skb = sock_alloc_send_pskb(sk, 0, 0,
2431 						   msg->msg_flags & MSG_DONTWAIT,
2432 						   &err, 0);
2433 		} else {
2434 			/* Keep two messages in the pipe so it schedules better */
2435 			size = min_t(int, size, (READ_ONCE(sk->sk_sndbuf) >> 1) - 64);
2436 
2437 			/* allow fallback to order-0 allocations */
2438 			size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
2439 
2440 			data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
2441 
2442 			data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
2443 
2444 			skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
2445 						   msg->msg_flags & MSG_DONTWAIT, &err,
2446 						   get_order(UNIX_SKB_FRAGS_SZ));
2447 		}
2448 		if (!skb)
2449 			goto out_err;
2450 
2451 		/* Only send the fds in the first buffer */
2452 		err = unix_scm_to_skb(&scm, skb, !fds_sent);
2453 		if (err < 0)
2454 			goto out_free;
2455 
2456 		fds_sent = true;
2457 
2458 		err = unix_maybe_add_creds(skb, sk, other);
2459 		if (err)
2460 			goto out_free;
2461 
2462 		if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
2463 			skb->ip_summed = CHECKSUM_UNNECESSARY;
2464 			err = skb_splice_from_iter(skb, &msg->msg_iter, size);
2465 			if (err < 0)
2466 				goto out_free;
2467 
2468 			size = err;
2469 			refcount_add(size, &sk->sk_wmem_alloc);
2470 		} else {
2471 			skb_put(skb, size - data_len);
2472 			skb->data_len = data_len;
2473 			skb->len = size;
2474 			err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
2475 			if (err)
2476 				goto out_free;
2477 		}
2478 
2479 		unix_state_lock(other);
2480 
2481 		if (sock_flag(other, SOCK_DEAD) ||
2482 		    (other->sk_shutdown & RCV_SHUTDOWN))
2483 			goto out_pipe_unlock;
2484 
2485 		if (UNIXCB(skb).fp && !other->sk_scm_rights) {
2486 			unix_state_unlock(other);
2487 			err = -EPERM;
2488 			goto out_free;
2489 		}
2490 
2491 		scm_stat_add(other, skb);
2492 
2493 		spin_lock(&other->sk_receive_queue.lock);
2494 		WRITE_ONCE(otheru->inq_len, otheru->inq_len + skb->len);
2495 		__skb_queue_tail(&other->sk_receive_queue, skb);
2496 		spin_unlock(&other->sk_receive_queue.lock);
2497 
2498 		unix_state_unlock(other);
2499 		other->sk_data_ready(other);
2500 		sent += size;
2501 	}
2502 
2503 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2504 	if (msg->msg_flags & MSG_OOB) {
2505 		err = queue_oob(sk, msg, other, &scm, fds_sent);
2506 		if (err)
2507 			goto out_err;
2508 		sent++;
2509 	}
2510 #endif
2511 
2512 	scm_destroy(&scm);
2513 
2514 	return sent;
2515 
2516 out_pipe_unlock:
2517 	unix_state_unlock(other);
2518 out_pipe:
2519 	if (!sent && !(msg->msg_flags & MSG_NOSIGNAL))
2520 		send_sig(SIGPIPE, current, 0);
2521 	err = -EPIPE;
2522 out_free:
2523 	consume_skb(skb);
2524 out_err:
2525 	scm_destroy(&scm);
2526 	return sent ? : err;
2527 }
2528 
2529 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2530 				  size_t len)
2531 {
2532 	int err;
2533 	struct sock *sk = sock->sk;
2534 
2535 	err = sock_error(sk);
2536 	if (err)
2537 		return err;
2538 
2539 	if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)
2540 		return -ENOTCONN;
2541 
2542 	if (msg->msg_namelen)
2543 		msg->msg_namelen = 0;
2544 
2545 	return unix_dgram_sendmsg(sock, msg, len);
2546 }
2547 
2548 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2549 				  size_t size, int flags)
2550 {
2551 	struct sock *sk = sock->sk;
2552 
2553 	if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)
2554 		return -ENOTCONN;
2555 
2556 	return unix_dgram_recvmsg(sock, msg, size, flags);
2557 }
2558 
2559 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2560 {
2561 	struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2562 
2563 	if (addr) {
2564 		msg->msg_namelen = addr->len;
2565 		memcpy(msg->msg_name, addr->name, addr->len);
2566 	}
2567 }
2568 
2569 int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
2570 			 int flags)
2571 {
2572 	struct scm_cookie scm;
2573 	struct socket *sock = sk->sk_socket;
2574 	struct unix_sock *u = unix_sk(sk);
2575 	struct sk_buff *skb, *last;
2576 	long timeo;
2577 	int skip;
2578 	int err;
2579 
2580 	err = -EOPNOTSUPP;
2581 	if (flags&MSG_OOB)
2582 		goto out;
2583 
2584 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2585 
2586 	do {
2587 		mutex_lock(&u->iolock);
2588 
2589 		skip = sk_peek_offset(sk, flags);
2590 		skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2591 					      &skip, &err, &last);
2592 		if (skb) {
2593 			if (!(flags & MSG_PEEK))
2594 				scm_stat_del(sk, skb);
2595 			break;
2596 		}
2597 
2598 		mutex_unlock(&u->iolock);
2599 
2600 		if (err != -EAGAIN)
2601 			break;
2602 	} while (timeo &&
2603 		 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2604 					      &err, &timeo, last));
2605 
2606 	if (!skb) { /* implies iolock unlocked */
2607 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2608 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2609 		    (READ_ONCE(sk->sk_shutdown) & RCV_SHUTDOWN))
2610 			err = 0;
2611 		goto out;
2612 	}
2613 
2614 	if (wq_has_sleeper(&u->peer_wait))
2615 		wake_up_interruptible_sync_poll(&u->peer_wait,
2616 						EPOLLOUT | EPOLLWRNORM |
2617 						EPOLLWRBAND);
2618 
2619 	if (msg->msg_name) {
2620 		unix_copy_addr(msg, skb->sk);
2621 
2622 		BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk,
2623 						      msg->msg_name,
2624 						      &msg->msg_namelen);
2625 	}
2626 
2627 	if (size > skb->len - skip)
2628 		size = skb->len - skip;
2629 	else if (size < skb->len - skip)
2630 		msg->msg_flags |= MSG_TRUNC;
2631 
2632 	err = skb_copy_datagram_msg(skb, skip, msg, size);
2633 	if (err)
2634 		goto out_free;
2635 
2636 	if (sock_flag(sk, SOCK_RCVTSTAMP))
2637 		__sock_recv_timestamp(msg, sk, skb);
2638 
2639 	memset(&scm, 0, sizeof(scm));
2640 
2641 	unix_skb_to_scm(skb, &scm);
2642 
2643 	if (!(flags & MSG_PEEK)) {
2644 		if (UNIXCB(skb).fp)
2645 			unix_detach_fds(&scm, skb);
2646 
2647 		sk_peek_offset_bwd(sk, skb->len);
2648 	} else {
2649 		/* It is questionable: on PEEK we could:
2650 		   - do not return fds - good, but too simple 8)
2651 		   - return fds, and do not return them on read (old strategy,
2652 		     apparently wrong)
2653 		   - clone fds (I chose it for now, it is the most universal
2654 		     solution)
2655 
2656 		   POSIX 1003.1g does not actually define this clearly
2657 		   at all. POSIX 1003.1g doesn't define a lot of things
2658 		   clearly however!
2659 
2660 		*/
2661 
2662 		sk_peek_offset_fwd(sk, size);
2663 
2664 		if (UNIXCB(skb).fp)
2665 			unix_peek_fds(&scm, skb);
2666 	}
2667 	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2668 
2669 	scm_recv_unix(sock, msg, &scm, flags);
2670 
2671 out_free:
2672 	skb_free_datagram(sk, skb);
2673 	mutex_unlock(&u->iolock);
2674 out:
2675 	return err;
2676 }
2677 
2678 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
2679 			      int flags)
2680 {
2681 	struct sock *sk = sock->sk;
2682 
2683 #ifdef CONFIG_BPF_SYSCALL
2684 	const struct proto *prot = READ_ONCE(sk->sk_prot);
2685 
2686 	if (prot != &unix_dgram_proto)
2687 		return prot->recvmsg(sk, msg, size, flags, NULL);
2688 #endif
2689 	return __unix_dgram_recvmsg(sk, msg, size, flags);
2690 }
2691 
2692 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
2693 {
2694 	struct unix_sock *u = unix_sk(sk);
2695 	struct sk_buff *skb;
2696 	int err;
2697 
2698 	mutex_lock(&u->iolock);
2699 	skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
2700 	mutex_unlock(&u->iolock);
2701 	if (!skb)
2702 		return err;
2703 
2704 	return recv_actor(sk, skb);
2705 }
2706 
2707 /*
2708  *	Sleep until more data has arrived. But check for races..
2709  */
2710 static long unix_stream_data_wait(struct sock *sk, long timeo,
2711 				  struct sk_buff *last, unsigned int last_len,
2712 				  bool freezable)
2713 {
2714 	unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE;
2715 	struct sk_buff *tail;
2716 	DEFINE_WAIT(wait);
2717 
2718 	unix_state_lock(sk);
2719 
2720 	for (;;) {
2721 		prepare_to_wait(sk_sleep(sk), &wait, state);
2722 
2723 		tail = skb_peek_tail(&sk->sk_receive_queue);
2724 		if (tail != last ||
2725 		    (tail && tail->len != last_len) ||
2726 		    sk->sk_err ||
2727 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
2728 		    signal_pending(current) ||
2729 		    !timeo)
2730 			break;
2731 
2732 		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2733 		unix_state_unlock(sk);
2734 		timeo = schedule_timeout(timeo);
2735 		unix_state_lock(sk);
2736 
2737 		if (sock_flag(sk, SOCK_DEAD))
2738 			break;
2739 
2740 		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2741 	}
2742 
2743 	finish_wait(sk_sleep(sk), &wait);
2744 	unix_state_unlock(sk);
2745 	return timeo;
2746 }
2747 
2748 struct unix_stream_read_state {
2749 	int (*recv_actor)(struct sk_buff *, int, int,
2750 			  struct unix_stream_read_state *);
2751 	struct socket *socket;
2752 	struct msghdr *msg;
2753 	struct pipe_inode_info *pipe;
2754 	size_t size;
2755 	int flags;
2756 	unsigned int splice_flags;
2757 };
2758 
2759 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2760 static int unix_stream_recv_urg(struct unix_stream_read_state *state)
2761 {
2762 	struct sk_buff *oob_skb, *read_skb = NULL;
2763 	struct socket *sock = state->socket;
2764 	struct sock *sk = sock->sk;
2765 	struct unix_sock *u = unix_sk(sk);
2766 	int chunk = 1;
2767 
2768 	mutex_lock(&u->iolock);
2769 	unix_state_lock(sk);
2770 	spin_lock(&sk->sk_receive_queue.lock);
2771 
2772 	if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
2773 		spin_unlock(&sk->sk_receive_queue.lock);
2774 		unix_state_unlock(sk);
2775 		mutex_unlock(&u->iolock);
2776 		return -EINVAL;
2777 	}
2778 
2779 	oob_skb = u->oob_skb;
2780 
2781 	if (!(state->flags & MSG_PEEK)) {
2782 		WRITE_ONCE(u->oob_skb, NULL);
2783 		WRITE_ONCE(u->inq_len, u->inq_len - 1);
2784 
2785 		if (oob_skb->prev != (struct sk_buff *)&sk->sk_receive_queue &&
2786 		    !unix_skb_len(oob_skb->prev)) {
2787 			read_skb = oob_skb->prev;
2788 			__skb_unlink(read_skb, &sk->sk_receive_queue);
2789 		}
2790 	}
2791 
2792 	spin_unlock(&sk->sk_receive_queue.lock);
2793 	unix_state_unlock(sk);
2794 
2795 	chunk = state->recv_actor(oob_skb, 0, chunk, state);
2796 
2797 	if (!(state->flags & MSG_PEEK))
2798 		UNIXCB(oob_skb).consumed += 1;
2799 
2800 	mutex_unlock(&u->iolock);
2801 
2802 	consume_skb(read_skb);
2803 
2804 	if (chunk < 0)
2805 		return -EFAULT;
2806 
2807 	state->msg->msg_flags |= MSG_OOB;
2808 	return 1;
2809 }
2810 
2811 static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
2812 				  int flags, int copied)
2813 {
2814 	struct sk_buff *read_skb = NULL, *unread_skb = NULL;
2815 	struct unix_sock *u = unix_sk(sk);
2816 
2817 	if (likely(unix_skb_len(skb) && skb != READ_ONCE(u->oob_skb)))
2818 		return skb;
2819 
2820 	spin_lock(&sk->sk_receive_queue.lock);
2821 
2822 	if (!unix_skb_len(skb)) {
2823 		if (copied && (!u->oob_skb || skb == u->oob_skb)) {
2824 			skb = NULL;
2825 		} else if (flags & MSG_PEEK) {
2826 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2827 		} else {
2828 			read_skb = skb;
2829 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2830 			__skb_unlink(read_skb, &sk->sk_receive_queue);
2831 		}
2832 
2833 		if (!skb)
2834 			goto unlock;
2835 	}
2836 
2837 	if (skb != u->oob_skb)
2838 		goto unlock;
2839 
2840 	if (copied) {
2841 		skb = NULL;
2842 	} else if (!(flags & MSG_PEEK)) {
2843 		WRITE_ONCE(u->oob_skb, NULL);
2844 
2845 		if (!sock_flag(sk, SOCK_URGINLINE)) {
2846 			__skb_unlink(skb, &sk->sk_receive_queue);
2847 			unread_skb = skb;
2848 			skb = skb_peek(&sk->sk_receive_queue);
2849 		}
2850 	} else if (!sock_flag(sk, SOCK_URGINLINE)) {
2851 		skb = skb_peek_next(skb, &sk->sk_receive_queue);
2852 	}
2853 
2854 unlock:
2855 	spin_unlock(&sk->sk_receive_queue.lock);
2856 
2857 	consume_skb(read_skb);
2858 	kfree_skb_reason(unread_skb, SKB_DROP_REASON_UNIX_SKIP_OOB);
2859 
2860 	return skb;
2861 }
2862 #endif
2863 
2864 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
2865 {
2866 	struct sk_buff_head *queue = &sk->sk_receive_queue;
2867 	struct unix_sock *u = unix_sk(sk);
2868 	struct sk_buff *skb;
2869 	int err;
2870 
2871 	if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED))
2872 		return -ENOTCONN;
2873 
2874 	err = sock_error(sk);
2875 	if (err)
2876 		return err;
2877 
2878 	mutex_lock(&u->iolock);
2879 	spin_lock(&queue->lock);
2880 
2881 	skb = __skb_dequeue(queue);
2882 	if (!skb) {
2883 		spin_unlock(&queue->lock);
2884 		mutex_unlock(&u->iolock);
2885 		return -EAGAIN;
2886 	}
2887 
2888 	WRITE_ONCE(u->inq_len, u->inq_len - skb->len);
2889 
2890 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2891 	if (skb == u->oob_skb) {
2892 		WRITE_ONCE(u->oob_skb, NULL);
2893 		spin_unlock(&queue->lock);
2894 		mutex_unlock(&u->iolock);
2895 
2896 		kfree_skb_reason(skb, SKB_DROP_REASON_UNIX_SKIP_OOB);
2897 		return -EAGAIN;
2898 	}
2899 #endif
2900 
2901 	spin_unlock(&queue->lock);
2902 	mutex_unlock(&u->iolock);
2903 
2904 	return recv_actor(sk, skb);
2905 }
2906 
2907 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2908 				    bool freezable)
2909 {
2910 	int noblock = state->flags & MSG_DONTWAIT;
2911 	struct socket *sock = state->socket;
2912 	struct msghdr *msg = state->msg;
2913 	struct sock *sk = sock->sk;
2914 	size_t size = state->size;
2915 	int flags = state->flags;
2916 	bool check_creds = false;
2917 	struct scm_cookie scm;
2918 	unsigned int last_len;
2919 	struct unix_sock *u;
2920 	int copied = 0;
2921 	int err = 0;
2922 	long timeo;
2923 	int target;
2924 	int skip;
2925 
2926 	if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)) {
2927 		err = -EINVAL;
2928 		goto out;
2929 	}
2930 
2931 	if (unlikely(flags & MSG_OOB)) {
2932 		err = -EOPNOTSUPP;
2933 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2934 		err = unix_stream_recv_urg(state);
2935 #endif
2936 		goto out;
2937 	}
2938 
2939 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2940 	timeo = sock_rcvtimeo(sk, noblock);
2941 
2942 	memset(&scm, 0, sizeof(scm));
2943 
2944 	u = unix_sk(sk);
2945 
2946 	/* Lock the socket to prevent queue disordering
2947 	 * while sleeps in memcpy_tomsg
2948 	 */
2949 	mutex_lock(&u->iolock);
2950 
2951 	skip = max(sk_peek_offset(sk, flags), 0);
2952 
2953 	do {
2954 		struct sk_buff *skb, *last;
2955 		int chunk;
2956 
2957 redo:
2958 		unix_state_lock(sk);
2959 		if (sock_flag(sk, SOCK_DEAD)) {
2960 			err = -ECONNRESET;
2961 			goto unlock;
2962 		}
2963 		last = skb = skb_peek(&sk->sk_receive_queue);
2964 		last_len = last ? last->len : 0;
2965 
2966 again:
2967 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2968 		if (skb) {
2969 			skb = manage_oob(skb, sk, flags, copied);
2970 			if (!skb && copied) {
2971 				unix_state_unlock(sk);
2972 				break;
2973 			}
2974 		}
2975 #endif
2976 		if (skb == NULL) {
2977 			if (copied >= target)
2978 				goto unlock;
2979 
2980 			/*
2981 			 *	POSIX 1003.1g mandates this order.
2982 			 */
2983 
2984 			err = sock_error(sk);
2985 			if (err)
2986 				goto unlock;
2987 			if (sk->sk_shutdown & RCV_SHUTDOWN)
2988 				goto unlock;
2989 
2990 			unix_state_unlock(sk);
2991 			if (!timeo) {
2992 				err = -EAGAIN;
2993 				break;
2994 			}
2995 
2996 			mutex_unlock(&u->iolock);
2997 
2998 			timeo = unix_stream_data_wait(sk, timeo, last,
2999 						      last_len, freezable);
3000 
3001 			if (signal_pending(current)) {
3002 				err = sock_intr_errno(timeo);
3003 				scm_destroy(&scm);
3004 				goto out;
3005 			}
3006 
3007 			mutex_lock(&u->iolock);
3008 			goto redo;
3009 unlock:
3010 			unix_state_unlock(sk);
3011 			break;
3012 		}
3013 
3014 		while (skip >= unix_skb_len(skb)) {
3015 			skip -= unix_skb_len(skb);
3016 			last = skb;
3017 			last_len = skb->len;
3018 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
3019 			if (!skb)
3020 				goto again;
3021 		}
3022 
3023 		unix_state_unlock(sk);
3024 
3025 		if (check_creds) {
3026 			/* Never glue messages from different writers */
3027 			if (!unix_skb_scm_eq(skb, &scm))
3028 				break;
3029 		} else if (unix_may_passcred(sk)) {
3030 			/* Copy credentials */
3031 			unix_skb_to_scm(skb, &scm);
3032 			check_creds = true;
3033 		}
3034 
3035 		/* Copy address just once */
3036 		if (msg && msg->msg_name) {
3037 			DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
3038 
3039 			unix_copy_addr(msg, skb->sk);
3040 			BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, msg->msg_name,
3041 							      &msg->msg_namelen);
3042 
3043 			sunaddr = NULL;
3044 		}
3045 
3046 		chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
3047 		chunk = state->recv_actor(skb, skip, chunk, state);
3048 		if (chunk < 0) {
3049 			if (copied == 0)
3050 				copied = -EFAULT;
3051 			break;
3052 		}
3053 		copied += chunk;
3054 		size -= chunk;
3055 
3056 		/* Mark read part of skb as used */
3057 		if (!(flags & MSG_PEEK)) {
3058 			UNIXCB(skb).consumed += chunk;
3059 
3060 			sk_peek_offset_bwd(sk, chunk);
3061 
3062 			if (UNIXCB(skb).fp) {
3063 				scm_stat_del(sk, skb);
3064 				unix_detach_fds(&scm, skb);
3065 			}
3066 
3067 			if (unix_skb_len(skb))
3068 				break;
3069 
3070 			spin_lock(&sk->sk_receive_queue.lock);
3071 			WRITE_ONCE(u->inq_len, u->inq_len - skb->len);
3072 			__skb_unlink(skb, &sk->sk_receive_queue);
3073 			spin_unlock(&sk->sk_receive_queue.lock);
3074 
3075 			consume_skb(skb);
3076 
3077 			if (scm.fp)
3078 				break;
3079 		} else {
3080 			/* It is questionable, see note in unix_dgram_recvmsg.
3081 			 */
3082 			if (UNIXCB(skb).fp)
3083 				unix_peek_fds(&scm, skb);
3084 
3085 			sk_peek_offset_fwd(sk, chunk);
3086 
3087 			if (UNIXCB(skb).fp)
3088 				break;
3089 
3090 			skip = 0;
3091 			last = skb;
3092 			last_len = skb->len;
3093 			unix_state_lock(sk);
3094 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
3095 			if (skb)
3096 				goto again;
3097 			unix_state_unlock(sk);
3098 			break;
3099 		}
3100 	} while (size);
3101 
3102 	mutex_unlock(&u->iolock);
3103 	if (msg) {
3104 		scm_recv_unix(sock, msg, &scm, flags);
3105 
3106 		if (READ_ONCE(u->recvmsg_inq) || msg->msg_get_inq) {
3107 			msg->msg_inq = READ_ONCE(u->inq_len);
3108 			put_cmsg(msg, SOL_SOCKET, SCM_INQ,
3109 				 sizeof(msg->msg_inq), &msg->msg_inq);
3110 		}
3111 	} else {
3112 		scm_destroy(&scm);
3113 	}
3114 out:
3115 	return copied ? : err;
3116 }
3117 
3118 static int unix_stream_read_actor(struct sk_buff *skb,
3119 				  int skip, int chunk,
3120 				  struct unix_stream_read_state *state)
3121 {
3122 	int ret;
3123 
3124 	ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
3125 				    state->msg, chunk);
3126 	return ret ?: chunk;
3127 }
3128 
3129 int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg,
3130 			  size_t size, int flags)
3131 {
3132 	struct unix_stream_read_state state = {
3133 		.recv_actor = unix_stream_read_actor,
3134 		.socket = sk->sk_socket,
3135 		.msg = msg,
3136 		.size = size,
3137 		.flags = flags
3138 	};
3139 
3140 	return unix_stream_read_generic(&state, true);
3141 }
3142 
3143 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
3144 			       size_t size, int flags)
3145 {
3146 	struct unix_stream_read_state state = {
3147 		.recv_actor = unix_stream_read_actor,
3148 		.socket = sock,
3149 		.msg = msg,
3150 		.size = size,
3151 		.flags = flags
3152 	};
3153 
3154 #ifdef CONFIG_BPF_SYSCALL
3155 	struct sock *sk = sock->sk;
3156 	const struct proto *prot = READ_ONCE(sk->sk_prot);
3157 
3158 	if (prot != &unix_stream_proto)
3159 		return prot->recvmsg(sk, msg, size, flags, NULL);
3160 #endif
3161 	return unix_stream_read_generic(&state, true);
3162 }
3163 
3164 static int unix_stream_splice_actor(struct sk_buff *skb,
3165 				    int skip, int chunk,
3166 				    struct unix_stream_read_state *state)
3167 {
3168 	return skb_splice_bits(skb, state->socket->sk,
3169 			       UNIXCB(skb).consumed + skip,
3170 			       state->pipe, chunk, state->splice_flags);
3171 }
3172 
3173 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
3174 				       struct pipe_inode_info *pipe,
3175 				       size_t size, unsigned int flags)
3176 {
3177 	struct unix_stream_read_state state = {
3178 		.recv_actor = unix_stream_splice_actor,
3179 		.socket = sock,
3180 		.pipe = pipe,
3181 		.size = size,
3182 		.splice_flags = flags,
3183 	};
3184 
3185 	if (unlikely(*ppos))
3186 		return -ESPIPE;
3187 
3188 	if (sock->file->f_flags & O_NONBLOCK ||
3189 	    flags & SPLICE_F_NONBLOCK)
3190 		state.flags = MSG_DONTWAIT;
3191 
3192 	return unix_stream_read_generic(&state, false);
3193 }
3194 
3195 static int unix_shutdown(struct socket *sock, int mode)
3196 {
3197 	struct sock *sk = sock->sk;
3198 	struct sock *other;
3199 
3200 	if (mode < SHUT_RD || mode > SHUT_RDWR)
3201 		return -EINVAL;
3202 	/* This maps:
3203 	 * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
3204 	 * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
3205 	 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
3206 	 */
3207 	++mode;
3208 
3209 	unix_state_lock(sk);
3210 	WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode);
3211 	other = unix_peer(sk);
3212 	if (other)
3213 		sock_hold(other);
3214 	unix_state_unlock(sk);
3215 	sk->sk_state_change(sk);
3216 
3217 	if (other &&
3218 		(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
3219 
3220 		int peer_mode = 0;
3221 		const struct proto *prot = READ_ONCE(other->sk_prot);
3222 
3223 		if (prot->unhash)
3224 			prot->unhash(other);
3225 		if (mode&RCV_SHUTDOWN)
3226 			peer_mode |= SEND_SHUTDOWN;
3227 		if (mode&SEND_SHUTDOWN)
3228 			peer_mode |= RCV_SHUTDOWN;
3229 		unix_state_lock(other);
3230 		WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode);
3231 		unix_state_unlock(other);
3232 		other->sk_state_change(other);
3233 		if (peer_mode == SHUTDOWN_MASK)
3234 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
3235 		else if (peer_mode & RCV_SHUTDOWN)
3236 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
3237 	}
3238 	if (other)
3239 		sock_put(other);
3240 
3241 	return 0;
3242 }
3243 
3244 long unix_inq_len(struct sock *sk)
3245 {
3246 	struct sk_buff *skb;
3247 	long amount = 0;
3248 
3249 	if (READ_ONCE(sk->sk_state) == TCP_LISTEN)
3250 		return -EINVAL;
3251 
3252 	if (sk->sk_type == SOCK_STREAM)
3253 		return READ_ONCE(unix_sk(sk)->inq_len);
3254 
3255 	spin_lock(&sk->sk_receive_queue.lock);
3256 	if (sk->sk_type == SOCK_SEQPACKET) {
3257 		skb_queue_walk(&sk->sk_receive_queue, skb)
3258 			amount += unix_skb_len(skb);
3259 	} else {
3260 		skb = skb_peek(&sk->sk_receive_queue);
3261 		if (skb)
3262 			amount = skb->len;
3263 	}
3264 	spin_unlock(&sk->sk_receive_queue.lock);
3265 
3266 	return amount;
3267 }
3268 EXPORT_SYMBOL_GPL(unix_inq_len);
3269 
3270 long unix_outq_len(struct sock *sk)
3271 {
3272 	return sk_wmem_alloc_get(sk);
3273 }
3274 EXPORT_SYMBOL_GPL(unix_outq_len);
3275 
3276 static int unix_open_file(struct sock *sk)
3277 {
3278 	struct file *f;
3279 	int fd;
3280 
3281 	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
3282 		return -EPERM;
3283 
3284 	if (!smp_load_acquire(&unix_sk(sk)->addr))
3285 		return -ENOENT;
3286 
3287 	if (!unix_sk(sk)->path.dentry)
3288 		return -ENOENT;
3289 
3290 	fd = get_unused_fd_flags(O_CLOEXEC);
3291 	if (fd < 0)
3292 		return fd;
3293 
3294 	f = dentry_open(&unix_sk(sk)->path, O_PATH, current_cred());
3295 	if (IS_ERR(f)) {
3296 		put_unused_fd(fd);
3297 		return PTR_ERR(f);
3298 	}
3299 
3300 	fd_install(fd, f);
3301 	return fd;
3302 }
3303 
3304 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3305 {
3306 	struct sock *sk = sock->sk;
3307 	long amount = 0;
3308 	int err;
3309 
3310 	switch (cmd) {
3311 	case SIOCOUTQ:
3312 		amount = unix_outq_len(sk);
3313 		err = put_user(amount, (int __user *)arg);
3314 		break;
3315 	case SIOCINQ:
3316 		amount = unix_inq_len(sk);
3317 		if (amount < 0)
3318 			err = amount;
3319 		else
3320 			err = put_user(amount, (int __user *)arg);
3321 		break;
3322 	case SIOCUNIXFILE:
3323 		err = unix_open_file(sk);
3324 		break;
3325 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3326 	case SIOCATMARK:
3327 		{
3328 			struct unix_sock *u = unix_sk(sk);
3329 			struct sk_buff *skb;
3330 			int answ = 0;
3331 
3332 			mutex_lock(&u->iolock);
3333 
3334 			skb = skb_peek(&sk->sk_receive_queue);
3335 			if (skb) {
3336 				struct sk_buff *oob_skb = READ_ONCE(u->oob_skb);
3337 				struct sk_buff *next_skb;
3338 
3339 				next_skb = skb_peek_next(skb, &sk->sk_receive_queue);
3340 
3341 				if (skb == oob_skb ||
3342 				    (!unix_skb_len(skb) &&
3343 				     (!oob_skb || next_skb == oob_skb)))
3344 					answ = 1;
3345 			}
3346 
3347 			mutex_unlock(&u->iolock);
3348 
3349 			err = put_user(answ, (int __user *)arg);
3350 		}
3351 		break;
3352 #endif
3353 	default:
3354 		err = -ENOIOCTLCMD;
3355 		break;
3356 	}
3357 	return err;
3358 }
3359 
3360 #ifdef CONFIG_COMPAT
3361 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3362 {
3363 	return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
3364 }
3365 #endif
3366 
3367 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
3368 {
3369 	struct sock *sk = sock->sk;
3370 	unsigned char state;
3371 	__poll_t mask;
3372 	u8 shutdown;
3373 
3374 	sock_poll_wait(file, sock, wait);
3375 	mask = 0;
3376 	shutdown = READ_ONCE(sk->sk_shutdown);
3377 	state = READ_ONCE(sk->sk_state);
3378 
3379 	/* exceptional events? */
3380 	if (READ_ONCE(sk->sk_err))
3381 		mask |= EPOLLERR;
3382 	if (shutdown == SHUTDOWN_MASK)
3383 		mask |= EPOLLHUP;
3384 	if (shutdown & RCV_SHUTDOWN)
3385 		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3386 
3387 	/* readable? */
3388 	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3389 		mask |= EPOLLIN | EPOLLRDNORM;
3390 	if (sk_is_readable(sk))
3391 		mask |= EPOLLIN | EPOLLRDNORM;
3392 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3393 	if (READ_ONCE(unix_sk(sk)->oob_skb))
3394 		mask |= EPOLLPRI;
3395 #endif
3396 
3397 	/* Connection-based need to check for termination and startup */
3398 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
3399 	    state == TCP_CLOSE)
3400 		mask |= EPOLLHUP;
3401 
3402 	/*
3403 	 * we set writable also when the other side has shut down the
3404 	 * connection. This prevents stuck sockets.
3405 	 */
3406 	if (unix_writable(sk, state))
3407 		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3408 
3409 	return mask;
3410 }
3411 
3412 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
3413 				    poll_table *wait)
3414 {
3415 	struct sock *sk = sock->sk, *other;
3416 	unsigned int writable;
3417 	unsigned char state;
3418 	__poll_t mask;
3419 	u8 shutdown;
3420 
3421 	sock_poll_wait(file, sock, wait);
3422 	mask = 0;
3423 	shutdown = READ_ONCE(sk->sk_shutdown);
3424 	state = READ_ONCE(sk->sk_state);
3425 
3426 	/* exceptional events? */
3427 	if (READ_ONCE(sk->sk_err) ||
3428 	    !skb_queue_empty_lockless(&sk->sk_error_queue))
3429 		mask |= EPOLLERR |
3430 			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
3431 
3432 	if (shutdown & RCV_SHUTDOWN)
3433 		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3434 	if (shutdown == SHUTDOWN_MASK)
3435 		mask |= EPOLLHUP;
3436 
3437 	/* readable? */
3438 	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3439 		mask |= EPOLLIN | EPOLLRDNORM;
3440 	if (sk_is_readable(sk))
3441 		mask |= EPOLLIN | EPOLLRDNORM;
3442 
3443 	/* Connection-based need to check for termination and startup */
3444 	if (sk->sk_type == SOCK_SEQPACKET && state == TCP_CLOSE)
3445 		mask |= EPOLLHUP;
3446 
3447 	/* No write status requested, avoid expensive OUT tests. */
3448 	if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
3449 		return mask;
3450 
3451 	writable = unix_writable(sk, state);
3452 	if (writable) {
3453 		unix_state_lock(sk);
3454 
3455 		other = unix_peer(sk);
3456 		if (other && unix_peer(other) != sk &&
3457 		    unix_recvq_full_lockless(other) &&
3458 		    unix_dgram_peer_wake_me(sk, other))
3459 			writable = 0;
3460 
3461 		unix_state_unlock(sk);
3462 	}
3463 
3464 	if (writable)
3465 		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3466 	else
3467 		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
3468 
3469 	return mask;
3470 }
3471 
3472 #ifdef CONFIG_PROC_FS
3473 
3474 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
3475 
3476 #define get_bucket(x) ((x) >> BUCKET_SPACE)
3477 #define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1))
3478 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
3479 
3480 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
3481 {
3482 	unsigned long offset = get_offset(*pos);
3483 	unsigned long bucket = get_bucket(*pos);
3484 	unsigned long count = 0;
3485 	struct sock *sk;
3486 
3487 	for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]);
3488 	     sk; sk = sk_next(sk)) {
3489 		if (++count == offset)
3490 			break;
3491 	}
3492 
3493 	return sk;
3494 }
3495 
3496 static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos)
3497 {
3498 	unsigned long bucket = get_bucket(*pos);
3499 	struct net *net = seq_file_net(seq);
3500 	struct sock *sk;
3501 
3502 	while (bucket < UNIX_HASH_SIZE) {
3503 		spin_lock(&net->unx.table.locks[bucket]);
3504 
3505 		sk = unix_from_bucket(seq, pos);
3506 		if (sk)
3507 			return sk;
3508 
3509 		spin_unlock(&net->unx.table.locks[bucket]);
3510 
3511 		*pos = set_bucket_offset(++bucket, 1);
3512 	}
3513 
3514 	return NULL;
3515 }
3516 
3517 static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk,
3518 				  loff_t *pos)
3519 {
3520 	unsigned long bucket = get_bucket(*pos);
3521 
3522 	sk = sk_next(sk);
3523 	if (sk)
3524 		return sk;
3525 
3526 
3527 	spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]);
3528 
3529 	*pos = set_bucket_offset(++bucket, 1);
3530 
3531 	return unix_get_first(seq, pos);
3532 }
3533 
3534 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
3535 {
3536 	if (!*pos)
3537 		return SEQ_START_TOKEN;
3538 
3539 	return unix_get_first(seq, pos);
3540 }
3541 
3542 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3543 {
3544 	++*pos;
3545 
3546 	if (v == SEQ_START_TOKEN)
3547 		return unix_get_first(seq, pos);
3548 
3549 	return unix_get_next(seq, v, pos);
3550 }
3551 
3552 static void unix_seq_stop(struct seq_file *seq, void *v)
3553 {
3554 	struct sock *sk = v;
3555 
3556 	if (sk)
3557 		spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]);
3558 }
3559 
3560 static int unix_seq_show(struct seq_file *seq, void *v)
3561 {
3562 
3563 	if (v == SEQ_START_TOKEN)
3564 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
3565 			 "Inode Path\n");
3566 	else {
3567 		struct sock *s = v;
3568 		struct unix_sock *u = unix_sk(s);
3569 		unix_state_lock(s);
3570 
3571 		seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
3572 			s,
3573 			refcount_read(&s->sk_refcnt),
3574 			0,
3575 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
3576 			s->sk_type,
3577 			s->sk_socket ?
3578 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
3579 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
3580 			sock_i_ino(s));
3581 
3582 		if (u->addr) {	// under a hash table lock here
3583 			int i, len;
3584 			seq_putc(seq, ' ');
3585 
3586 			i = 0;
3587 			len = u->addr->len -
3588 				offsetof(struct sockaddr_un, sun_path);
3589 			if (u->addr->name->sun_path[0]) {
3590 				len--;
3591 			} else {
3592 				seq_putc(seq, '@');
3593 				i++;
3594 			}
3595 			for ( ; i < len; i++)
3596 				seq_putc(seq, u->addr->name->sun_path[i] ?:
3597 					 '@');
3598 		}
3599 		unix_state_unlock(s);
3600 		seq_putc(seq, '\n');
3601 	}
3602 
3603 	return 0;
3604 }
3605 
3606 static const struct seq_operations unix_seq_ops = {
3607 	.start  = unix_seq_start,
3608 	.next   = unix_seq_next,
3609 	.stop   = unix_seq_stop,
3610 	.show   = unix_seq_show,
3611 };
3612 
3613 #ifdef CONFIG_BPF_SYSCALL
3614 struct bpf_unix_iter_state {
3615 	struct seq_net_private p;
3616 	unsigned int cur_sk;
3617 	unsigned int end_sk;
3618 	unsigned int max_sk;
3619 	struct sock **batch;
3620 	bool st_bucket_done;
3621 };
3622 
3623 struct bpf_iter__unix {
3624 	__bpf_md_ptr(struct bpf_iter_meta *, meta);
3625 	__bpf_md_ptr(struct unix_sock *, unix_sk);
3626 	uid_t uid __aligned(8);
3627 };
3628 
3629 static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
3630 			      struct unix_sock *unix_sk, uid_t uid)
3631 {
3632 	struct bpf_iter__unix ctx;
3633 
3634 	meta->seq_num--;  /* skip SEQ_START_TOKEN */
3635 	ctx.meta = meta;
3636 	ctx.unix_sk = unix_sk;
3637 	ctx.uid = uid;
3638 	return bpf_iter_run_prog(prog, &ctx);
3639 }
3640 
3641 static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)
3642 
3643 {
3644 	struct bpf_unix_iter_state *iter = seq->private;
3645 	unsigned int expected = 1;
3646 	struct sock *sk;
3647 
3648 	sock_hold(start_sk);
3649 	iter->batch[iter->end_sk++] = start_sk;
3650 
3651 	for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) {
3652 		if (iter->end_sk < iter->max_sk) {
3653 			sock_hold(sk);
3654 			iter->batch[iter->end_sk++] = sk;
3655 		}
3656 
3657 		expected++;
3658 	}
3659 
3660 	spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]);
3661 
3662 	return expected;
3663 }
3664 
3665 static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter)
3666 {
3667 	while (iter->cur_sk < iter->end_sk)
3668 		sock_put(iter->batch[iter->cur_sk++]);
3669 }
3670 
3671 static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter,
3672 				       unsigned int new_batch_sz)
3673 {
3674 	struct sock **new_batch;
3675 
3676 	new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
3677 			     GFP_USER | __GFP_NOWARN);
3678 	if (!new_batch)
3679 		return -ENOMEM;
3680 
3681 	bpf_iter_unix_put_batch(iter);
3682 	kvfree(iter->batch);
3683 	iter->batch = new_batch;
3684 	iter->max_sk = new_batch_sz;
3685 
3686 	return 0;
3687 }
3688 
3689 static struct sock *bpf_iter_unix_batch(struct seq_file *seq,
3690 					loff_t *pos)
3691 {
3692 	struct bpf_unix_iter_state *iter = seq->private;
3693 	unsigned int expected;
3694 	bool resized = false;
3695 	struct sock *sk;
3696 
3697 	if (iter->st_bucket_done)
3698 		*pos = set_bucket_offset(get_bucket(*pos) + 1, 1);
3699 
3700 again:
3701 	/* Get a new batch */
3702 	iter->cur_sk = 0;
3703 	iter->end_sk = 0;
3704 
3705 	sk = unix_get_first(seq, pos);
3706 	if (!sk)
3707 		return NULL; /* Done */
3708 
3709 	expected = bpf_iter_unix_hold_batch(seq, sk);
3710 
3711 	if (iter->end_sk == expected) {
3712 		iter->st_bucket_done = true;
3713 		return sk;
3714 	}
3715 
3716 	if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) {
3717 		resized = true;
3718 		goto again;
3719 	}
3720 
3721 	return sk;
3722 }
3723 
3724 static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos)
3725 {
3726 	if (!*pos)
3727 		return SEQ_START_TOKEN;
3728 
3729 	/* bpf iter does not support lseek, so it always
3730 	 * continue from where it was stop()-ped.
3731 	 */
3732 	return bpf_iter_unix_batch(seq, pos);
3733 }
3734 
3735 static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3736 {
3737 	struct bpf_unix_iter_state *iter = seq->private;
3738 	struct sock *sk;
3739 
3740 	/* Whenever seq_next() is called, the iter->cur_sk is
3741 	 * done with seq_show(), so advance to the next sk in
3742 	 * the batch.
3743 	 */
3744 	if (iter->cur_sk < iter->end_sk)
3745 		sock_put(iter->batch[iter->cur_sk++]);
3746 
3747 	++*pos;
3748 
3749 	if (iter->cur_sk < iter->end_sk)
3750 		sk = iter->batch[iter->cur_sk];
3751 	else
3752 		sk = bpf_iter_unix_batch(seq, pos);
3753 
3754 	return sk;
3755 }
3756 
3757 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
3758 {
3759 	struct bpf_iter_meta meta;
3760 	struct bpf_prog *prog;
3761 	struct sock *sk = v;
3762 	uid_t uid;
3763 	bool slow;
3764 	int ret;
3765 
3766 	if (v == SEQ_START_TOKEN)
3767 		return 0;
3768 
3769 	slow = lock_sock_fast(sk);
3770 
3771 	if (unlikely(sk_unhashed(sk))) {
3772 		ret = SEQ_SKIP;
3773 		goto unlock;
3774 	}
3775 
3776 	uid = from_kuid_munged(seq_user_ns(seq), sk_uid(sk));
3777 	meta.seq = seq;
3778 	prog = bpf_iter_get_info(&meta, false);
3779 	ret = unix_prog_seq_show(prog, &meta, v, uid);
3780 unlock:
3781 	unlock_sock_fast(sk, slow);
3782 	return ret;
3783 }
3784 
3785 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
3786 {
3787 	struct bpf_unix_iter_state *iter = seq->private;
3788 	struct bpf_iter_meta meta;
3789 	struct bpf_prog *prog;
3790 
3791 	if (!v) {
3792 		meta.seq = seq;
3793 		prog = bpf_iter_get_info(&meta, true);
3794 		if (prog)
3795 			(void)unix_prog_seq_show(prog, &meta, v, 0);
3796 	}
3797 
3798 	if (iter->cur_sk < iter->end_sk)
3799 		bpf_iter_unix_put_batch(iter);
3800 }
3801 
3802 static const struct seq_operations bpf_iter_unix_seq_ops = {
3803 	.start	= bpf_iter_unix_seq_start,
3804 	.next	= bpf_iter_unix_seq_next,
3805 	.stop	= bpf_iter_unix_seq_stop,
3806 	.show	= bpf_iter_unix_seq_show,
3807 };
3808 #endif
3809 #endif
3810 
3811 static const struct net_proto_family unix_family_ops = {
3812 	.family = PF_UNIX,
3813 	.create = unix_create,
3814 	.owner	= THIS_MODULE,
3815 };
3816 
3817 
3818 static int __net_init unix_net_init(struct net *net)
3819 {
3820 	int i;
3821 
3822 	net->unx.sysctl_max_dgram_qlen = 10;
3823 	if (unix_sysctl_register(net))
3824 		goto out;
3825 
3826 #ifdef CONFIG_PROC_FS
3827 	if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
3828 			     sizeof(struct seq_net_private)))
3829 		goto err_sysctl;
3830 #endif
3831 
3832 	net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE,
3833 					      sizeof(spinlock_t), GFP_KERNEL);
3834 	if (!net->unx.table.locks)
3835 		goto err_proc;
3836 
3837 	net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE,
3838 						sizeof(struct hlist_head),
3839 						GFP_KERNEL);
3840 	if (!net->unx.table.buckets)
3841 		goto free_locks;
3842 
3843 	for (i = 0; i < UNIX_HASH_SIZE; i++) {
3844 		spin_lock_init(&net->unx.table.locks[i]);
3845 		lock_set_cmp_fn(&net->unx.table.locks[i], unix_table_lock_cmp_fn, NULL);
3846 		INIT_HLIST_HEAD(&net->unx.table.buckets[i]);
3847 	}
3848 
3849 	return 0;
3850 
3851 free_locks:
3852 	kvfree(net->unx.table.locks);
3853 err_proc:
3854 #ifdef CONFIG_PROC_FS
3855 	remove_proc_entry("unix", net->proc_net);
3856 err_sysctl:
3857 #endif
3858 	unix_sysctl_unregister(net);
3859 out:
3860 	return -ENOMEM;
3861 }
3862 
3863 static void __net_exit unix_net_exit(struct net *net)
3864 {
3865 	kvfree(net->unx.table.buckets);
3866 	kvfree(net->unx.table.locks);
3867 	unix_sysctl_unregister(net);
3868 	remove_proc_entry("unix", net->proc_net);
3869 }
3870 
3871 static struct pernet_operations unix_net_ops = {
3872 	.init = unix_net_init,
3873 	.exit = unix_net_exit,
3874 };
3875 
3876 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3877 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
3878 		     struct unix_sock *unix_sk, uid_t uid)
3879 
3880 #define INIT_BATCH_SZ 16
3881 
3882 static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux)
3883 {
3884 	struct bpf_unix_iter_state *iter = priv_data;
3885 	int err;
3886 
3887 	err = bpf_iter_init_seq_net(priv_data, aux);
3888 	if (err)
3889 		return err;
3890 
3891 	err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ);
3892 	if (err) {
3893 		bpf_iter_fini_seq_net(priv_data);
3894 		return err;
3895 	}
3896 
3897 	return 0;
3898 }
3899 
3900 static void bpf_iter_fini_unix(void *priv_data)
3901 {
3902 	struct bpf_unix_iter_state *iter = priv_data;
3903 
3904 	bpf_iter_fini_seq_net(priv_data);
3905 	kvfree(iter->batch);
3906 }
3907 
3908 static const struct bpf_iter_seq_info unix_seq_info = {
3909 	.seq_ops		= &bpf_iter_unix_seq_ops,
3910 	.init_seq_private	= bpf_iter_init_unix,
3911 	.fini_seq_private	= bpf_iter_fini_unix,
3912 	.seq_priv_size		= sizeof(struct bpf_unix_iter_state),
3913 };
3914 
3915 static const struct bpf_func_proto *
3916 bpf_iter_unix_get_func_proto(enum bpf_func_id func_id,
3917 			     const struct bpf_prog *prog)
3918 {
3919 	switch (func_id) {
3920 	case BPF_FUNC_setsockopt:
3921 		return &bpf_sk_setsockopt_proto;
3922 	case BPF_FUNC_getsockopt:
3923 		return &bpf_sk_getsockopt_proto;
3924 	default:
3925 		return NULL;
3926 	}
3927 }
3928 
3929 static struct bpf_iter_reg unix_reg_info = {
3930 	.target			= "unix",
3931 	.ctx_arg_info_size	= 1,
3932 	.ctx_arg_info		= {
3933 		{ offsetof(struct bpf_iter__unix, unix_sk),
3934 		  PTR_TO_BTF_ID_OR_NULL },
3935 	},
3936 	.get_func_proto         = bpf_iter_unix_get_func_proto,
3937 	.seq_info		= &unix_seq_info,
3938 };
3939 
3940 static void __init bpf_iter_register(void)
3941 {
3942 	unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
3943 	if (bpf_iter_reg_target(&unix_reg_info))
3944 		pr_warn("Warning: could not register bpf iterator unix\n");
3945 }
3946 #endif
3947 
3948 static int __init af_unix_init(void)
3949 {
3950 	int i, rc = -1;
3951 
3952 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
3953 
3954 	for (i = 0; i < UNIX_HASH_SIZE / 2; i++) {
3955 		spin_lock_init(&bsd_socket_locks[i]);
3956 		INIT_HLIST_HEAD(&bsd_socket_buckets[i]);
3957 	}
3958 
3959 	rc = proto_register(&unix_dgram_proto, 1);
3960 	if (rc != 0) {
3961 		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3962 		goto out;
3963 	}
3964 
3965 	rc = proto_register(&unix_stream_proto, 1);
3966 	if (rc != 0) {
3967 		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3968 		proto_unregister(&unix_dgram_proto);
3969 		goto out;
3970 	}
3971 
3972 	sock_register(&unix_family_ops);
3973 	register_pernet_subsys(&unix_net_ops);
3974 	unix_bpf_build_proto();
3975 
3976 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3977 	bpf_iter_register();
3978 #endif
3979 
3980 out:
3981 	return rc;
3982 }
3983 
3984 /* Later than subsys_initcall() because we depend on stuff initialised there */
3985 fs_initcall(af_unix_init);
3986