xref: /linux/net/unix/af_unix.c (revision f7c595c9d9f4cce9ec335f0d3c5d875bb547f9d5)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * NET4:	Implementation of BSD Unix domain sockets.
4  *
5  * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
6  *
7  * Fixes:
8  *		Linus Torvalds	:	Assorted bug cures.
9  *		Niibe Yutaka	:	async I/O support.
10  *		Carsten Paeth	:	PF_UNIX check, address fixes.
11  *		Alan Cox	:	Limit size of allocated blocks.
12  *		Alan Cox	:	Fixed the stupid socketpair bug.
13  *		Alan Cox	:	BSD compatibility fine tuning.
14  *		Alan Cox	:	Fixed a bug in connect when interrupted.
15  *		Alan Cox	:	Sorted out a proper draft version of
16  *					file descriptor passing hacked up from
17  *					Mike Shaver's work.
18  *		Marty Leisner	:	Fixes to fd passing
19  *		Nick Nevin	:	recvmsg bugfix.
20  *		Alan Cox	:	Started proper garbage collector
21  *		Heiko EiBfeldt	:	Missing verify_area check
22  *		Alan Cox	:	Started POSIXisms
23  *		Andreas Schwab	:	Replace inode by dentry for proper
24  *					reference counting
25  *		Kirk Petersen	:	Made this a module
26  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
27  *					Lots of bug fixes.
28  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
29  *					by above two patches.
30  *	     Andrea Arcangeli	:	If possible we block in connect(2)
31  *					if the max backlog of the listen socket
32  *					is been reached. This won't break
33  *					old apps and it will avoid huge amount
34  *					of socks hashed (this for unix_gc()
35  *					performances reasons).
36  *					Security fix that limits the max
37  *					number of socks to 2*max_files and
38  *					the number of skb queueable in the
39  *					dgram receiver.
40  *		Artur Skawina   :	Hash function optimizations
41  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
42  *	      Malcolm Beattie   :	Set peercred for socketpair
43  *	     Michal Ostrowski   :       Module initialization cleanup.
44  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
45  *	     				the core infrastructure is doing that
46  *	     				for all net proto families now (2.5.69+)
47  *
48  * Known differences from reference BSD that was tested:
49  *
50  *	[TO FIX]
51  *	ECONNREFUSED is not returned from one end of a connected() socket to the
52  *		other the moment one end closes.
53  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
54  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
55  *	[NOT TO FIX]
56  *	accept() returns a path name even if the connecting socket has closed
57  *		in the meantime (BSD loses the path and gives up).
58  *	accept() returns 0 length path for an unbound connector. BSD returns 16
59  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
61  *	BSD af_unix apparently has connect forgetting to block properly.
62  *		(need to check this with the POSIX spec in detail)
63  *
64  * Differences from 2.0.0-11-... (ANK)
65  *	Bug fixes and improvements.
66  *		- client shutdown killed server socket.
67  *		- removed all useless cli/sti pairs.
68  *
69  *	Semantic changes/extensions.
70  *		- generic control message passing.
71  *		- SCM_CREDENTIALS control message.
72  *		- "Abstract" (not FS based) socket bindings.
73  *		  Abstract names are sequences of bytes (not zero terminated)
74  *		  started by 0, so that this name space does not intersect
75  *		  with BSD names.
76  */
77 
78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
79 
80 #include <linux/bpf-cgroup.h>
81 #include <linux/btf_ids.h>
82 #include <linux/dcache.h>
83 #include <linux/errno.h>
84 #include <linux/fcntl.h>
85 #include <linux/file.h>
86 #include <linux/filter.h>
87 #include <linux/fs.h>
88 #include <linux/fs_struct.h>
89 #include <linux/init.h>
90 #include <linux/kernel.h>
91 #include <linux/mount.h>
92 #include <linux/namei.h>
93 #include <linux/net.h>
94 #include <linux/pidfs.h>
95 #include <linux/poll.h>
96 #include <linux/proc_fs.h>
97 #include <linux/sched/signal.h>
98 #include <linux/security.h>
99 #include <linux/seq_file.h>
100 #include <linux/skbuff.h>
101 #include <linux/slab.h>
102 #include <linux/socket.h>
103 #include <linux/splice.h>
104 #include <linux/string.h>
105 #include <linux/uaccess.h>
106 #include <net/af_unix.h>
107 #include <net/net_namespace.h>
108 #include <net/scm.h>
109 #include <net/tcp_states.h>
110 #include <uapi/linux/sockios.h>
111 #include <uapi/linux/termios.h>
112 
113 #include "af_unix.h"
114 
115 static atomic_long_t unix_nr_socks;
116 static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
117 static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
118 
119 /* SMP locking strategy:
120  *    hash table is protected with spinlock.
121  *    each socket state is protected by separate spinlock.
122  */
123 #ifdef CONFIG_PROVE_LOCKING
124 #define cmp_ptr(l, r)	(((l) > (r)) - ((l) < (r)))
125 
126 static int unix_table_lock_cmp_fn(const struct lockdep_map *a,
127 				  const struct lockdep_map *b)
128 {
129 	return cmp_ptr(a, b);
130 }
131 
132 static int unix_state_lock_cmp_fn(const struct lockdep_map *_a,
133 				  const struct lockdep_map *_b)
134 {
135 	const struct unix_sock *a, *b;
136 
137 	a = container_of(_a, struct unix_sock, lock.dep_map);
138 	b = container_of(_b, struct unix_sock, lock.dep_map);
139 
140 	if (a->sk.sk_state == TCP_LISTEN) {
141 		/* unix_stream_connect(): Before the 2nd unix_state_lock(),
142 		 *
143 		 *   1. a is TCP_LISTEN.
144 		 *   2. b is not a.
145 		 *   3. concurrent connect(b -> a) must fail.
146 		 *
147 		 * Except for 2. & 3., the b's state can be any possible
148 		 * value due to concurrent connect() or listen().
149 		 *
150 		 * 2. is detected in debug_spin_lock_before(), and 3. cannot
151 		 * be expressed as lock_cmp_fn.
152 		 */
153 		switch (b->sk.sk_state) {
154 		case TCP_CLOSE:
155 		case TCP_ESTABLISHED:
156 		case TCP_LISTEN:
157 			return -1;
158 		default:
159 			/* Invalid case. */
160 			return 0;
161 		}
162 	}
163 
164 	/* Should never happen.  Just to be symmetric. */
165 	if (b->sk.sk_state == TCP_LISTEN) {
166 		switch (b->sk.sk_state) {
167 		case TCP_CLOSE:
168 		case TCP_ESTABLISHED:
169 			return 1;
170 		default:
171 			return 0;
172 		}
173 	}
174 
175 	/* unix_state_double_lock(): ascending address order. */
176 	return cmp_ptr(a, b);
177 }
178 
179 static int unix_recvq_lock_cmp_fn(const struct lockdep_map *_a,
180 				  const struct lockdep_map *_b)
181 {
182 	const struct sock *a, *b;
183 
184 	a = container_of(_a, struct sock, sk_receive_queue.lock.dep_map);
185 	b = container_of(_b, struct sock, sk_receive_queue.lock.dep_map);
186 
187 	/* unix_collect_skb(): listener -> embryo order. */
188 	if (a->sk_state == TCP_LISTEN && unix_sk(b)->listener == a)
189 		return -1;
190 
191 	/* Should never happen.  Just to be symmetric. */
192 	if (b->sk_state == TCP_LISTEN && unix_sk(a)->listener == b)
193 		return 1;
194 
195 	return 0;
196 }
197 #endif
198 
199 static unsigned int unix_unbound_hash(struct sock *sk)
200 {
201 	unsigned long hash = (unsigned long)sk;
202 
203 	hash ^= hash >> 16;
204 	hash ^= hash >> 8;
205 	hash ^= sk->sk_type;
206 
207 	return hash & UNIX_HASH_MOD;
208 }
209 
210 static unsigned int unix_bsd_hash(struct inode *i)
211 {
212 	return i->i_ino & UNIX_HASH_MOD;
213 }
214 
215 static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr,
216 				       int addr_len, int type)
217 {
218 	__wsum csum = csum_partial(sunaddr, addr_len, 0);
219 	unsigned int hash;
220 
221 	hash = (__force unsigned int)csum_fold(csum);
222 	hash ^= hash >> 8;
223 	hash ^= type;
224 
225 	return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD);
226 }
227 
228 static void unix_table_double_lock(struct net *net,
229 				   unsigned int hash1, unsigned int hash2)
230 {
231 	if (hash1 == hash2) {
232 		spin_lock(&net->unx.table.locks[hash1]);
233 		return;
234 	}
235 
236 	if (hash1 > hash2)
237 		swap(hash1, hash2);
238 
239 	spin_lock(&net->unx.table.locks[hash1]);
240 	spin_lock(&net->unx.table.locks[hash2]);
241 }
242 
243 static void unix_table_double_unlock(struct net *net,
244 				     unsigned int hash1, unsigned int hash2)
245 {
246 	if (hash1 == hash2) {
247 		spin_unlock(&net->unx.table.locks[hash1]);
248 		return;
249 	}
250 
251 	spin_unlock(&net->unx.table.locks[hash1]);
252 	spin_unlock(&net->unx.table.locks[hash2]);
253 }
254 
255 #ifdef CONFIG_SECURITY_NETWORK
256 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
257 {
258 	UNIXCB(skb).secid = scm->secid;
259 }
260 
261 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
262 {
263 	scm->secid = UNIXCB(skb).secid;
264 }
265 
266 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
267 {
268 	return (scm->secid == UNIXCB(skb).secid);
269 }
270 #else
271 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
272 { }
273 
274 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
275 { }
276 
277 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
278 {
279 	return true;
280 }
281 #endif /* CONFIG_SECURITY_NETWORK */
282 
283 static inline int unix_may_send(struct sock *sk, struct sock *osk)
284 {
285 	return !unix_peer(osk) || unix_peer(osk) == sk;
286 }
287 
288 static inline int unix_recvq_full_lockless(const struct sock *sk)
289 {
290 	return skb_queue_len_lockless(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
291 }
292 
293 struct sock *unix_peer_get(struct sock *s)
294 {
295 	struct sock *peer;
296 
297 	unix_state_lock(s);
298 	peer = unix_peer(s);
299 	if (peer)
300 		sock_hold(peer);
301 	unix_state_unlock(s);
302 	return peer;
303 }
304 EXPORT_SYMBOL_GPL(unix_peer_get);
305 
306 static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr,
307 					     int addr_len)
308 {
309 	struct unix_address *addr;
310 
311 	addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL);
312 	if (!addr)
313 		return NULL;
314 
315 	refcount_set(&addr->refcnt, 1);
316 	addr->len = addr_len;
317 	memcpy(addr->name, sunaddr, addr_len);
318 
319 	return addr;
320 }
321 
322 static inline void unix_release_addr(struct unix_address *addr)
323 {
324 	if (refcount_dec_and_test(&addr->refcnt))
325 		kfree(addr);
326 }
327 
328 /*
329  *	Check unix socket name:
330  *		- should be not zero length.
331  *	        - if started by not zero, should be NULL terminated (FS object)
332  *		- if started by zero, it is abstract name.
333  */
334 
335 static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len)
336 {
337 	if (addr_len <= offsetof(struct sockaddr_un, sun_path) ||
338 	    addr_len > sizeof(*sunaddr))
339 		return -EINVAL;
340 
341 	if (sunaddr->sun_family != AF_UNIX)
342 		return -EINVAL;
343 
344 	return 0;
345 }
346 
347 static int unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len)
348 {
349 	struct sockaddr_storage *addr = (struct sockaddr_storage *)sunaddr;
350 	short offset = offsetof(struct sockaddr_storage, __data);
351 
352 	BUILD_BUG_ON(offset != offsetof(struct sockaddr_un, sun_path));
353 
354 	/* This may look like an off by one error but it is a bit more
355 	 * subtle.  108 is the longest valid AF_UNIX path for a binding.
356 	 * sun_path[108] doesn't as such exist.  However in kernel space
357 	 * we are guaranteed that it is a valid memory location in our
358 	 * kernel address buffer because syscall functions always pass
359 	 * a pointer of struct sockaddr_storage which has a bigger buffer
360 	 * than 108.  Also, we must terminate sun_path for strlen() in
361 	 * getname_kernel().
362 	 */
363 	addr->__data[addr_len - offset] = 0;
364 
365 	/* Don't pass sunaddr->sun_path to strlen().  Otherwise, 108 will
366 	 * cause panic if CONFIG_FORTIFY_SOURCE=y.  Let __fortify_strlen()
367 	 * know the actual buffer.
368 	 */
369 	return strlen(addr->__data) + offset + 1;
370 }
371 
372 static void __unix_remove_socket(struct sock *sk)
373 {
374 	sk_del_node_init(sk);
375 }
376 
377 static void __unix_insert_socket(struct net *net, struct sock *sk)
378 {
379 	DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
380 	sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]);
381 }
382 
383 static void __unix_set_addr_hash(struct net *net, struct sock *sk,
384 				 struct unix_address *addr, unsigned int hash)
385 {
386 	__unix_remove_socket(sk);
387 	smp_store_release(&unix_sk(sk)->addr, addr);
388 
389 	sk->sk_hash = hash;
390 	__unix_insert_socket(net, sk);
391 }
392 
393 static void unix_remove_socket(struct net *net, struct sock *sk)
394 {
395 	spin_lock(&net->unx.table.locks[sk->sk_hash]);
396 	__unix_remove_socket(sk);
397 	spin_unlock(&net->unx.table.locks[sk->sk_hash]);
398 }
399 
400 static void unix_insert_unbound_socket(struct net *net, struct sock *sk)
401 {
402 	spin_lock(&net->unx.table.locks[sk->sk_hash]);
403 	__unix_insert_socket(net, sk);
404 	spin_unlock(&net->unx.table.locks[sk->sk_hash]);
405 }
406 
407 static void unix_insert_bsd_socket(struct sock *sk)
408 {
409 	spin_lock(&bsd_socket_locks[sk->sk_hash]);
410 	sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]);
411 	spin_unlock(&bsd_socket_locks[sk->sk_hash]);
412 }
413 
414 static void unix_remove_bsd_socket(struct sock *sk)
415 {
416 	if (!hlist_unhashed(&sk->sk_bind_node)) {
417 		spin_lock(&bsd_socket_locks[sk->sk_hash]);
418 		__sk_del_bind_node(sk);
419 		spin_unlock(&bsd_socket_locks[sk->sk_hash]);
420 
421 		sk_node_init(&sk->sk_bind_node);
422 	}
423 }
424 
425 static struct sock *__unix_find_socket_byname(struct net *net,
426 					      struct sockaddr_un *sunname,
427 					      int len, unsigned int hash)
428 {
429 	struct sock *s;
430 
431 	sk_for_each(s, &net->unx.table.buckets[hash]) {
432 		struct unix_sock *u = unix_sk(s);
433 
434 		if (u->addr->len == len &&
435 		    !memcmp(u->addr->name, sunname, len))
436 			return s;
437 	}
438 	return NULL;
439 }
440 
441 static inline struct sock *unix_find_socket_byname(struct net *net,
442 						   struct sockaddr_un *sunname,
443 						   int len, unsigned int hash)
444 {
445 	struct sock *s;
446 
447 	spin_lock(&net->unx.table.locks[hash]);
448 	s = __unix_find_socket_byname(net, sunname, len, hash);
449 	if (s)
450 		sock_hold(s);
451 	spin_unlock(&net->unx.table.locks[hash]);
452 	return s;
453 }
454 
455 static struct sock *unix_find_socket_byinode(struct inode *i)
456 {
457 	unsigned int hash = unix_bsd_hash(i);
458 	struct sock *s;
459 
460 	spin_lock(&bsd_socket_locks[hash]);
461 	sk_for_each_bound(s, &bsd_socket_buckets[hash]) {
462 		struct dentry *dentry = unix_sk(s)->path.dentry;
463 
464 		if (dentry && d_backing_inode(dentry) == i) {
465 			sock_hold(s);
466 			spin_unlock(&bsd_socket_locks[hash]);
467 			return s;
468 		}
469 	}
470 	spin_unlock(&bsd_socket_locks[hash]);
471 	return NULL;
472 }
473 
474 /* Support code for asymmetrically connected dgram sockets
475  *
476  * If a datagram socket is connected to a socket not itself connected
477  * to the first socket (eg, /dev/log), clients may only enqueue more
478  * messages if the present receive queue of the server socket is not
479  * "too large". This means there's a second writeability condition
480  * poll and sendmsg need to test. The dgram recv code will do a wake
481  * up on the peer_wait wait queue of a socket upon reception of a
482  * datagram which needs to be propagated to sleeping would-be writers
483  * since these might not have sent anything so far. This can't be
484  * accomplished via poll_wait because the lifetime of the server
485  * socket might be less than that of its clients if these break their
486  * association with it or if the server socket is closed while clients
487  * are still connected to it and there's no way to inform "a polling
488  * implementation" that it should let go of a certain wait queue
489  *
490  * In order to propagate a wake up, a wait_queue_entry_t of the client
491  * socket is enqueued on the peer_wait queue of the server socket
492  * whose wake function does a wake_up on the ordinary client socket
493  * wait queue. This connection is established whenever a write (or
494  * poll for write) hit the flow control condition and broken when the
495  * association to the server socket is dissolved or after a wake up
496  * was relayed.
497  */
498 
499 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
500 				      void *key)
501 {
502 	struct unix_sock *u;
503 	wait_queue_head_t *u_sleep;
504 
505 	u = container_of(q, struct unix_sock, peer_wake);
506 
507 	__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
508 			    q);
509 	u->peer_wake.private = NULL;
510 
511 	/* relaying can only happen while the wq still exists */
512 	u_sleep = sk_sleep(&u->sk);
513 	if (u_sleep)
514 		wake_up_interruptible_poll(u_sleep, key_to_poll(key));
515 
516 	return 0;
517 }
518 
519 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
520 {
521 	struct unix_sock *u, *u_other;
522 	int rc;
523 
524 	u = unix_sk(sk);
525 	u_other = unix_sk(other);
526 	rc = 0;
527 	spin_lock(&u_other->peer_wait.lock);
528 
529 	if (!u->peer_wake.private) {
530 		u->peer_wake.private = other;
531 		__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
532 
533 		rc = 1;
534 	}
535 
536 	spin_unlock(&u_other->peer_wait.lock);
537 	return rc;
538 }
539 
540 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
541 					    struct sock *other)
542 {
543 	struct unix_sock *u, *u_other;
544 
545 	u = unix_sk(sk);
546 	u_other = unix_sk(other);
547 	spin_lock(&u_other->peer_wait.lock);
548 
549 	if (u->peer_wake.private == other) {
550 		__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
551 		u->peer_wake.private = NULL;
552 	}
553 
554 	spin_unlock(&u_other->peer_wait.lock);
555 }
556 
557 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
558 						   struct sock *other)
559 {
560 	unix_dgram_peer_wake_disconnect(sk, other);
561 	wake_up_interruptible_poll(sk_sleep(sk),
562 				   EPOLLOUT |
563 				   EPOLLWRNORM |
564 				   EPOLLWRBAND);
565 }
566 
567 /* preconditions:
568  *	- unix_peer(sk) == other
569  *	- association is stable
570  */
571 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
572 {
573 	int connected;
574 
575 	connected = unix_dgram_peer_wake_connect(sk, other);
576 
577 	/* If other is SOCK_DEAD, we want to make sure we signal
578 	 * POLLOUT, such that a subsequent write() can get a
579 	 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
580 	 * to other and its full, we will hang waiting for POLLOUT.
581 	 */
582 	if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
583 		return 1;
584 
585 	if (connected)
586 		unix_dgram_peer_wake_disconnect(sk, other);
587 
588 	return 0;
589 }
590 
591 static int unix_writable(const struct sock *sk, unsigned char state)
592 {
593 	return state != TCP_LISTEN &&
594 		(refcount_read(&sk->sk_wmem_alloc) << 2) <= READ_ONCE(sk->sk_sndbuf);
595 }
596 
597 static void unix_write_space(struct sock *sk)
598 {
599 	struct socket_wq *wq;
600 
601 	rcu_read_lock();
602 	if (unix_writable(sk, READ_ONCE(sk->sk_state))) {
603 		wq = rcu_dereference(sk->sk_wq);
604 		if (skwq_has_sleeper(wq))
605 			wake_up_interruptible_sync_poll(&wq->wait,
606 				EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
607 		sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
608 	}
609 	rcu_read_unlock();
610 }
611 
612 /* When dgram socket disconnects (or changes its peer), we clear its receive
613  * queue of packets arrived from previous peer. First, it allows to do
614  * flow control based only on wmem_alloc; second, sk connected to peer
615  * may receive messages only from that peer. */
616 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
617 {
618 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
619 		skb_queue_purge_reason(&sk->sk_receive_queue,
620 				       SKB_DROP_REASON_UNIX_DISCONNECT);
621 
622 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
623 
624 		/* If one link of bidirectional dgram pipe is disconnected,
625 		 * we signal error. Messages are lost. Do not make this,
626 		 * when peer was not connected to us.
627 		 */
628 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
629 			WRITE_ONCE(other->sk_err, ECONNRESET);
630 			sk_error_report(other);
631 		}
632 	}
633 }
634 
635 static void unix_sock_destructor(struct sock *sk)
636 {
637 	struct unix_sock *u = unix_sk(sk);
638 
639 	skb_queue_purge_reason(&sk->sk_receive_queue, SKB_DROP_REASON_SOCKET_CLOSE);
640 
641 	DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc));
642 	DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
643 	DEBUG_NET_WARN_ON_ONCE(sk->sk_socket);
644 	if (!sock_flag(sk, SOCK_DEAD)) {
645 		pr_info("Attempt to release alive unix socket: %p\n", sk);
646 		return;
647 	}
648 
649 	if (sk->sk_peer_pid)
650 		pidfs_put_pid(sk->sk_peer_pid);
651 
652 	if (u->addr)
653 		unix_release_addr(u->addr);
654 
655 	atomic_long_dec(&unix_nr_socks);
656 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
657 #ifdef UNIX_REFCNT_DEBUG
658 	pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
659 		atomic_long_read(&unix_nr_socks));
660 #endif
661 }
662 
663 static unsigned int unix_skb_len(const struct sk_buff *skb)
664 {
665 	return skb->len - UNIXCB(skb).consumed;
666 }
667 
668 static void unix_release_sock(struct sock *sk, int embrion)
669 {
670 	struct unix_sock *u = unix_sk(sk);
671 	struct sock *skpair;
672 	struct sk_buff *skb;
673 	struct path path;
674 	int state;
675 
676 	unix_remove_socket(sock_net(sk), sk);
677 	unix_remove_bsd_socket(sk);
678 
679 	/* Clear state */
680 	unix_state_lock(sk);
681 	sock_orphan(sk);
682 	WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
683 	path	     = u->path;
684 	u->path.dentry = NULL;
685 	u->path.mnt = NULL;
686 	state = sk->sk_state;
687 	WRITE_ONCE(sk->sk_state, TCP_CLOSE);
688 
689 	skpair = unix_peer(sk);
690 	unix_peer(sk) = NULL;
691 
692 	unix_state_unlock(sk);
693 
694 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
695 	u->oob_skb = NULL;
696 #endif
697 
698 	wake_up_interruptible_all(&u->peer_wait);
699 
700 	if (skpair != NULL) {
701 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
702 			struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
703 
704 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
705 			if (skb && !unix_skb_len(skb))
706 				skb = skb_peek_next(skb, &sk->sk_receive_queue);
707 #endif
708 			unix_state_lock(skpair);
709 			/* No more writes */
710 			WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
711 			if (skb || embrion)
712 				WRITE_ONCE(skpair->sk_err, ECONNRESET);
713 			unix_state_unlock(skpair);
714 			skpair->sk_state_change(skpair);
715 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
716 		}
717 
718 		unix_dgram_peer_wake_disconnect(sk, skpair);
719 		sock_put(skpair); /* It may now die */
720 	}
721 
722 	/* Try to flush out this socket. Throw out buffers at least */
723 
724 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
725 		if (state == TCP_LISTEN)
726 			unix_release_sock(skb->sk, 1);
727 
728 		/* passed fds are erased in the kfree_skb hook */
729 		kfree_skb_reason(skb, SKB_DROP_REASON_SOCKET_CLOSE);
730 	}
731 
732 	if (path.dentry)
733 		path_put(&path);
734 
735 	sock_put(sk);
736 
737 	/* ---- Socket is dead now and most probably destroyed ---- */
738 
739 	/*
740 	 * Fixme: BSD difference: In BSD all sockets connected to us get
741 	 *	  ECONNRESET and we die on the spot. In Linux we behave
742 	 *	  like files and pipes do and wait for the last
743 	 *	  dereference.
744 	 *
745 	 * Can't we simply set sock->err?
746 	 *
747 	 *	  What the above comment does talk about? --ANK(980817)
748 	 */
749 
750 	if (READ_ONCE(unix_tot_inflight))
751 		unix_gc();		/* Garbage collect fds */
752 }
753 
754 struct unix_peercred {
755 	struct pid *peer_pid;
756 	const struct cred *peer_cred;
757 };
758 
759 static inline int prepare_peercred(struct unix_peercred *peercred)
760 {
761 	struct pid *pid;
762 	int err;
763 
764 	pid = task_tgid(current);
765 	err = pidfs_register_pid(pid);
766 	if (likely(!err)) {
767 		peercred->peer_pid = get_pid(pid);
768 		peercred->peer_cred = get_current_cred();
769 	}
770 	return err;
771 }
772 
773 static void drop_peercred(struct unix_peercred *peercred)
774 {
775 	const struct cred *cred = NULL;
776 	struct pid *pid = NULL;
777 
778 	might_sleep();
779 
780 	swap(peercred->peer_pid, pid);
781 	swap(peercred->peer_cred, cred);
782 
783 	pidfs_put_pid(pid);
784 	put_pid(pid);
785 	put_cred(cred);
786 }
787 
788 static inline void init_peercred(struct sock *sk,
789 				 const struct unix_peercred *peercred)
790 {
791 	sk->sk_peer_pid = peercred->peer_pid;
792 	sk->sk_peer_cred = peercred->peer_cred;
793 }
794 
795 static void update_peercred(struct sock *sk, struct unix_peercred *peercred)
796 {
797 	const struct cred *old_cred;
798 	struct pid *old_pid;
799 
800 	spin_lock(&sk->sk_peer_lock);
801 	old_pid = sk->sk_peer_pid;
802 	old_cred = sk->sk_peer_cred;
803 	init_peercred(sk, peercred);
804 	spin_unlock(&sk->sk_peer_lock);
805 
806 	peercred->peer_pid = old_pid;
807 	peercred->peer_cred = old_cred;
808 }
809 
810 static void copy_peercred(struct sock *sk, struct sock *peersk)
811 {
812 	lockdep_assert_held(&unix_sk(peersk)->lock);
813 
814 	spin_lock(&sk->sk_peer_lock);
815 	sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
816 	pidfs_get_pid(sk->sk_peer_pid);
817 	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
818 	spin_unlock(&sk->sk_peer_lock);
819 }
820 
821 static bool unix_may_passcred(const struct sock *sk)
822 {
823 	return sk->sk_scm_credentials || sk->sk_scm_pidfd;
824 }
825 
826 static int unix_listen(struct socket *sock, int backlog)
827 {
828 	int err;
829 	struct sock *sk = sock->sk;
830 	struct unix_sock *u = unix_sk(sk);
831 	struct unix_peercred peercred = {};
832 
833 	err = -EOPNOTSUPP;
834 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
835 		goto out;	/* Only stream/seqpacket sockets accept */
836 	err = -EINVAL;
837 	if (!READ_ONCE(u->addr))
838 		goto out;	/* No listens on an unbound socket */
839 	err = prepare_peercred(&peercred);
840 	if (err)
841 		goto out;
842 	unix_state_lock(sk);
843 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
844 		goto out_unlock;
845 	if (backlog > sk->sk_max_ack_backlog)
846 		wake_up_interruptible_all(&u->peer_wait);
847 	sk->sk_max_ack_backlog	= backlog;
848 	WRITE_ONCE(sk->sk_state, TCP_LISTEN);
849 
850 	/* set credentials so connect can copy them */
851 	update_peercred(sk, &peercred);
852 	err = 0;
853 
854 out_unlock:
855 	unix_state_unlock(sk);
856 	drop_peercred(&peercred);
857 out:
858 	return err;
859 }
860 
861 static int unix_release(struct socket *);
862 static int unix_bind(struct socket *, struct sockaddr *, int);
863 static int unix_stream_connect(struct socket *, struct sockaddr *,
864 			       int addr_len, int flags);
865 static int unix_socketpair(struct socket *, struct socket *);
866 static int unix_accept(struct socket *, struct socket *, struct proto_accept_arg *arg);
867 static int unix_getname(struct socket *, struct sockaddr *, int);
868 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
869 static __poll_t unix_dgram_poll(struct file *, struct socket *,
870 				    poll_table *);
871 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
872 #ifdef CONFIG_COMPAT
873 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
874 #endif
875 static int unix_shutdown(struct socket *, int);
876 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
877 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
878 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
879 				       struct pipe_inode_info *, size_t size,
880 				       unsigned int flags);
881 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
882 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
883 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
884 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
885 static int unix_dgram_connect(struct socket *, struct sockaddr *,
886 			      int, int);
887 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
888 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
889 				  int);
890 
891 #ifdef CONFIG_PROC_FS
892 static int unix_count_nr_fds(struct sock *sk)
893 {
894 	struct sk_buff *skb;
895 	struct unix_sock *u;
896 	int nr_fds = 0;
897 
898 	spin_lock(&sk->sk_receive_queue.lock);
899 	skb = skb_peek(&sk->sk_receive_queue);
900 	while (skb) {
901 		u = unix_sk(skb->sk);
902 		nr_fds += atomic_read(&u->scm_stat.nr_fds);
903 		skb = skb_peek_next(skb, &sk->sk_receive_queue);
904 	}
905 	spin_unlock(&sk->sk_receive_queue.lock);
906 
907 	return nr_fds;
908 }
909 
910 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
911 {
912 	struct sock *sk = sock->sk;
913 	unsigned char s_state;
914 	struct unix_sock *u;
915 	int nr_fds = 0;
916 
917 	if (sk) {
918 		s_state = READ_ONCE(sk->sk_state);
919 		u = unix_sk(sk);
920 
921 		/* SOCK_STREAM and SOCK_SEQPACKET sockets never change their
922 		 * sk_state after switching to TCP_ESTABLISHED or TCP_LISTEN.
923 		 * SOCK_DGRAM is ordinary. So, no lock is needed.
924 		 */
925 		if (sock->type == SOCK_DGRAM || s_state == TCP_ESTABLISHED)
926 			nr_fds = atomic_read(&u->scm_stat.nr_fds);
927 		else if (s_state == TCP_LISTEN)
928 			nr_fds = unix_count_nr_fds(sk);
929 
930 		seq_printf(m, "scm_fds: %u\n", nr_fds);
931 	}
932 }
933 #else
934 #define unix_show_fdinfo NULL
935 #endif
936 
937 static bool unix_custom_sockopt(int optname)
938 {
939 	switch (optname) {
940 	case SO_INQ:
941 		return true;
942 	default:
943 		return false;
944 	}
945 }
946 
947 static int unix_setsockopt(struct socket *sock, int level, int optname,
948 			   sockptr_t optval, unsigned int optlen)
949 {
950 	struct unix_sock *u = unix_sk(sock->sk);
951 	struct sock *sk = sock->sk;
952 	int val;
953 
954 	if (level != SOL_SOCKET)
955 		return -EOPNOTSUPP;
956 
957 	if (!unix_custom_sockopt(optname))
958 		return sock_setsockopt(sock, level, optname, optval, optlen);
959 
960 	if (optlen != sizeof(int))
961 		return -EINVAL;
962 
963 	if (copy_from_sockptr(&val, optval, sizeof(val)))
964 		return -EFAULT;
965 
966 	switch (optname) {
967 	case SO_INQ:
968 		if (sk->sk_type != SOCK_STREAM)
969 			return -EINVAL;
970 
971 		if (val > 1 || val < 0)
972 			return -EINVAL;
973 
974 		WRITE_ONCE(u->recvmsg_inq, val);
975 		break;
976 	default:
977 		return -ENOPROTOOPT;
978 	}
979 
980 	return 0;
981 }
982 
983 static const struct proto_ops unix_stream_ops = {
984 	.family =	PF_UNIX,
985 	.owner =	THIS_MODULE,
986 	.release =	unix_release,
987 	.bind =		unix_bind,
988 	.connect =	unix_stream_connect,
989 	.socketpair =	unix_socketpair,
990 	.accept =	unix_accept,
991 	.getname =	unix_getname,
992 	.poll =		unix_poll,
993 	.ioctl =	unix_ioctl,
994 #ifdef CONFIG_COMPAT
995 	.compat_ioctl =	unix_compat_ioctl,
996 #endif
997 	.listen =	unix_listen,
998 	.shutdown =	unix_shutdown,
999 	.setsockopt =	unix_setsockopt,
1000 	.sendmsg =	unix_stream_sendmsg,
1001 	.recvmsg =	unix_stream_recvmsg,
1002 	.read_skb =	unix_stream_read_skb,
1003 	.mmap =		sock_no_mmap,
1004 	.splice_read =	unix_stream_splice_read,
1005 	.set_peek_off =	sk_set_peek_off,
1006 	.show_fdinfo =	unix_show_fdinfo,
1007 };
1008 
1009 static const struct proto_ops unix_dgram_ops = {
1010 	.family =	PF_UNIX,
1011 	.owner =	THIS_MODULE,
1012 	.release =	unix_release,
1013 	.bind =		unix_bind,
1014 	.connect =	unix_dgram_connect,
1015 	.socketpair =	unix_socketpair,
1016 	.accept =	sock_no_accept,
1017 	.getname =	unix_getname,
1018 	.poll =		unix_dgram_poll,
1019 	.ioctl =	unix_ioctl,
1020 #ifdef CONFIG_COMPAT
1021 	.compat_ioctl =	unix_compat_ioctl,
1022 #endif
1023 	.listen =	sock_no_listen,
1024 	.shutdown =	unix_shutdown,
1025 	.sendmsg =	unix_dgram_sendmsg,
1026 	.read_skb =	unix_read_skb,
1027 	.recvmsg =	unix_dgram_recvmsg,
1028 	.mmap =		sock_no_mmap,
1029 	.set_peek_off =	sk_set_peek_off,
1030 	.show_fdinfo =	unix_show_fdinfo,
1031 };
1032 
1033 static const struct proto_ops unix_seqpacket_ops = {
1034 	.family =	PF_UNIX,
1035 	.owner =	THIS_MODULE,
1036 	.release =	unix_release,
1037 	.bind =		unix_bind,
1038 	.connect =	unix_stream_connect,
1039 	.socketpair =	unix_socketpair,
1040 	.accept =	unix_accept,
1041 	.getname =	unix_getname,
1042 	.poll =		unix_dgram_poll,
1043 	.ioctl =	unix_ioctl,
1044 #ifdef CONFIG_COMPAT
1045 	.compat_ioctl =	unix_compat_ioctl,
1046 #endif
1047 	.listen =	unix_listen,
1048 	.shutdown =	unix_shutdown,
1049 	.sendmsg =	unix_seqpacket_sendmsg,
1050 	.recvmsg =	unix_seqpacket_recvmsg,
1051 	.mmap =		sock_no_mmap,
1052 	.set_peek_off =	sk_set_peek_off,
1053 	.show_fdinfo =	unix_show_fdinfo,
1054 };
1055 
1056 static void unix_close(struct sock *sk, long timeout)
1057 {
1058 	/* Nothing to do here, unix socket does not need a ->close().
1059 	 * This is merely for sockmap.
1060 	 */
1061 }
1062 
1063 static bool unix_bpf_bypass_getsockopt(int level, int optname)
1064 {
1065 	if (level == SOL_SOCKET) {
1066 		switch (optname) {
1067 		case SO_PEERPIDFD:
1068 			return true;
1069 		default:
1070 			return false;
1071 		}
1072 	}
1073 
1074 	return false;
1075 }
1076 
1077 struct proto unix_dgram_proto = {
1078 	.name			= "UNIX",
1079 	.owner			= THIS_MODULE,
1080 	.obj_size		= sizeof(struct unix_sock),
1081 	.close			= unix_close,
1082 	.bpf_bypass_getsockopt	= unix_bpf_bypass_getsockopt,
1083 #ifdef CONFIG_BPF_SYSCALL
1084 	.psock_update_sk_prot	= unix_dgram_bpf_update_proto,
1085 #endif
1086 };
1087 
1088 struct proto unix_stream_proto = {
1089 	.name			= "UNIX-STREAM",
1090 	.owner			= THIS_MODULE,
1091 	.obj_size		= sizeof(struct unix_sock),
1092 	.close			= unix_close,
1093 	.bpf_bypass_getsockopt	= unix_bpf_bypass_getsockopt,
1094 #ifdef CONFIG_BPF_SYSCALL
1095 	.psock_update_sk_prot	= unix_stream_bpf_update_proto,
1096 #endif
1097 };
1098 
1099 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
1100 {
1101 	struct unix_sock *u;
1102 	struct sock *sk;
1103 	int err;
1104 
1105 	atomic_long_inc(&unix_nr_socks);
1106 	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) {
1107 		err = -ENFILE;
1108 		goto err;
1109 	}
1110 
1111 	if (type == SOCK_STREAM)
1112 		sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
1113 	else /*dgram and  seqpacket */
1114 		sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
1115 
1116 	if (!sk) {
1117 		err = -ENOMEM;
1118 		goto err;
1119 	}
1120 
1121 	sock_init_data(sock, sk);
1122 
1123 	sk->sk_scm_rights	= 1;
1124 	sk->sk_hash		= unix_unbound_hash(sk);
1125 	sk->sk_allocation	= GFP_KERNEL_ACCOUNT;
1126 	sk->sk_write_space	= unix_write_space;
1127 	sk->sk_max_ack_backlog	= READ_ONCE(net->unx.sysctl_max_dgram_qlen);
1128 	sk->sk_destruct		= unix_sock_destructor;
1129 	lock_set_cmp_fn(&sk->sk_receive_queue.lock, unix_recvq_lock_cmp_fn, NULL);
1130 
1131 	u = unix_sk(sk);
1132 	u->listener = NULL;
1133 	u->vertex = NULL;
1134 	u->path.dentry = NULL;
1135 	u->path.mnt = NULL;
1136 	spin_lock_init(&u->lock);
1137 	lock_set_cmp_fn(&u->lock, unix_state_lock_cmp_fn, NULL);
1138 	mutex_init(&u->iolock); /* single task reading lock */
1139 	mutex_init(&u->bindlock); /* single task binding lock */
1140 	init_waitqueue_head(&u->peer_wait);
1141 	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
1142 	memset(&u->scm_stat, 0, sizeof(struct scm_stat));
1143 	unix_insert_unbound_socket(net, sk);
1144 
1145 	sock_prot_inuse_add(net, sk->sk_prot, 1);
1146 
1147 	return sk;
1148 
1149 err:
1150 	atomic_long_dec(&unix_nr_socks);
1151 	return ERR_PTR(err);
1152 }
1153 
1154 static int unix_create(struct net *net, struct socket *sock, int protocol,
1155 		       int kern)
1156 {
1157 	struct sock *sk;
1158 
1159 	if (protocol && protocol != PF_UNIX)
1160 		return -EPROTONOSUPPORT;
1161 
1162 	sock->state = SS_UNCONNECTED;
1163 
1164 	switch (sock->type) {
1165 	case SOCK_STREAM:
1166 		set_bit(SOCK_CUSTOM_SOCKOPT, &sock->flags);
1167 		sock->ops = &unix_stream_ops;
1168 		break;
1169 		/*
1170 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
1171 		 *	nothing uses it.
1172 		 */
1173 	case SOCK_RAW:
1174 		sock->type = SOCK_DGRAM;
1175 		fallthrough;
1176 	case SOCK_DGRAM:
1177 		sock->ops = &unix_dgram_ops;
1178 		break;
1179 	case SOCK_SEQPACKET:
1180 		sock->ops = &unix_seqpacket_ops;
1181 		break;
1182 	default:
1183 		return -ESOCKTNOSUPPORT;
1184 	}
1185 
1186 	sk = unix_create1(net, sock, kern, sock->type);
1187 	if (IS_ERR(sk))
1188 		return PTR_ERR(sk);
1189 
1190 	return 0;
1191 }
1192 
1193 static int unix_release(struct socket *sock)
1194 {
1195 	struct sock *sk = sock->sk;
1196 
1197 	if (!sk)
1198 		return 0;
1199 
1200 	sk->sk_prot->close(sk, 0);
1201 	unix_release_sock(sk, 0);
1202 	sock->sk = NULL;
1203 
1204 	return 0;
1205 }
1206 
1207 static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len,
1208 				  int type, int flags)
1209 {
1210 	struct inode *inode;
1211 	struct path path;
1212 	struct sock *sk;
1213 	int err;
1214 
1215 	unix_mkname_bsd(sunaddr, addr_len);
1216 
1217 	if (flags & SOCK_COREDUMP) {
1218 		const struct cred *cred;
1219 		struct cred *kcred;
1220 		struct path root;
1221 
1222 		kcred = prepare_kernel_cred(&init_task);
1223 		if (!kcred) {
1224 			err = -ENOMEM;
1225 			goto fail;
1226 		}
1227 
1228 		task_lock(&init_task);
1229 		get_fs_root(init_task.fs, &root);
1230 		task_unlock(&init_task);
1231 
1232 		cred = override_creds(kcred);
1233 		err = vfs_path_lookup(root.dentry, root.mnt, sunaddr->sun_path,
1234 				      LOOKUP_BENEATH | LOOKUP_NO_SYMLINKS |
1235 				      LOOKUP_NO_MAGICLINKS, &path);
1236 		put_cred(revert_creds(cred));
1237 		path_put(&root);
1238 		if (err)
1239 			goto fail;
1240 	} else {
1241 		err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path);
1242 		if (err)
1243 			goto fail;
1244 
1245 		err = path_permission(&path, MAY_WRITE);
1246 		if (err)
1247 			goto path_put;
1248 	}
1249 
1250 	err = -ECONNREFUSED;
1251 	inode = d_backing_inode(path.dentry);
1252 	if (!S_ISSOCK(inode->i_mode))
1253 		goto path_put;
1254 
1255 	sk = unix_find_socket_byinode(inode);
1256 	if (!sk)
1257 		goto path_put;
1258 
1259 	err = -EPROTOTYPE;
1260 	if (sk->sk_type == type)
1261 		touch_atime(&path);
1262 	else
1263 		goto sock_put;
1264 
1265 	path_put(&path);
1266 
1267 	return sk;
1268 
1269 sock_put:
1270 	sock_put(sk);
1271 path_put:
1272 	path_put(&path);
1273 fail:
1274 	return ERR_PTR(err);
1275 }
1276 
1277 static struct sock *unix_find_abstract(struct net *net,
1278 				       struct sockaddr_un *sunaddr,
1279 				       int addr_len, int type)
1280 {
1281 	unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type);
1282 	struct dentry *dentry;
1283 	struct sock *sk;
1284 
1285 	sk = unix_find_socket_byname(net, sunaddr, addr_len, hash);
1286 	if (!sk)
1287 		return ERR_PTR(-ECONNREFUSED);
1288 
1289 	dentry = unix_sk(sk)->path.dentry;
1290 	if (dentry)
1291 		touch_atime(&unix_sk(sk)->path);
1292 
1293 	return sk;
1294 }
1295 
1296 static struct sock *unix_find_other(struct net *net,
1297 				    struct sockaddr_un *sunaddr,
1298 				    int addr_len, int type, int flags)
1299 {
1300 	struct sock *sk;
1301 
1302 	if (sunaddr->sun_path[0])
1303 		sk = unix_find_bsd(sunaddr, addr_len, type, flags);
1304 	else
1305 		sk = unix_find_abstract(net, sunaddr, addr_len, type);
1306 
1307 	return sk;
1308 }
1309 
1310 static int unix_autobind(struct sock *sk)
1311 {
1312 	struct unix_sock *u = unix_sk(sk);
1313 	unsigned int new_hash, old_hash;
1314 	struct net *net = sock_net(sk);
1315 	struct unix_address *addr;
1316 	u32 lastnum, ordernum;
1317 	int err;
1318 
1319 	err = mutex_lock_interruptible(&u->bindlock);
1320 	if (err)
1321 		return err;
1322 
1323 	if (u->addr)
1324 		goto out;
1325 
1326 	err = -ENOMEM;
1327 	addr = kzalloc(sizeof(*addr) +
1328 		       offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL);
1329 	if (!addr)
1330 		goto out;
1331 
1332 	addr->len = offsetof(struct sockaddr_un, sun_path) + 6;
1333 	addr->name->sun_family = AF_UNIX;
1334 	refcount_set(&addr->refcnt, 1);
1335 
1336 	old_hash = sk->sk_hash;
1337 	ordernum = get_random_u32();
1338 	lastnum = ordernum & 0xFFFFF;
1339 retry:
1340 	ordernum = (ordernum + 1) & 0xFFFFF;
1341 	sprintf(addr->name->sun_path + 1, "%05x", ordernum);
1342 
1343 	new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
1344 	unix_table_double_lock(net, old_hash, new_hash);
1345 
1346 	if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) {
1347 		unix_table_double_unlock(net, old_hash, new_hash);
1348 
1349 		/* __unix_find_socket_byname() may take long time if many names
1350 		 * are already in use.
1351 		 */
1352 		cond_resched();
1353 
1354 		if (ordernum == lastnum) {
1355 			/* Give up if all names seems to be in use. */
1356 			err = -ENOSPC;
1357 			unix_release_addr(addr);
1358 			goto out;
1359 		}
1360 
1361 		goto retry;
1362 	}
1363 
1364 	__unix_set_addr_hash(net, sk, addr, new_hash);
1365 	unix_table_double_unlock(net, old_hash, new_hash);
1366 	err = 0;
1367 
1368 out:	mutex_unlock(&u->bindlock);
1369 	return err;
1370 }
1371 
1372 static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
1373 			 int addr_len)
1374 {
1375 	umode_t mode = S_IFSOCK |
1376 	       (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
1377 	struct unix_sock *u = unix_sk(sk);
1378 	unsigned int new_hash, old_hash;
1379 	struct net *net = sock_net(sk);
1380 	struct mnt_idmap *idmap;
1381 	struct unix_address *addr;
1382 	struct dentry *dentry;
1383 	struct path parent;
1384 	int err;
1385 
1386 	addr_len = unix_mkname_bsd(sunaddr, addr_len);
1387 	addr = unix_create_addr(sunaddr, addr_len);
1388 	if (!addr)
1389 		return -ENOMEM;
1390 
1391 	/*
1392 	 * Get the parent directory, calculate the hash for last
1393 	 * component.
1394 	 */
1395 	dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0);
1396 	if (IS_ERR(dentry)) {
1397 		err = PTR_ERR(dentry);
1398 		goto out;
1399 	}
1400 
1401 	/*
1402 	 * All right, let's create it.
1403 	 */
1404 	idmap = mnt_idmap(parent.mnt);
1405 	err = security_path_mknod(&parent, dentry, mode, 0);
1406 	if (!err)
1407 		err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0);
1408 	if (err)
1409 		goto out_path;
1410 	err = mutex_lock_interruptible(&u->bindlock);
1411 	if (err)
1412 		goto out_unlink;
1413 	if (u->addr)
1414 		goto out_unlock;
1415 
1416 	old_hash = sk->sk_hash;
1417 	new_hash = unix_bsd_hash(d_backing_inode(dentry));
1418 	unix_table_double_lock(net, old_hash, new_hash);
1419 	u->path.mnt = mntget(parent.mnt);
1420 	u->path.dentry = dget(dentry);
1421 	__unix_set_addr_hash(net, sk, addr, new_hash);
1422 	unix_table_double_unlock(net, old_hash, new_hash);
1423 	unix_insert_bsd_socket(sk);
1424 	mutex_unlock(&u->bindlock);
1425 	done_path_create(&parent, dentry);
1426 	return 0;
1427 
1428 out_unlock:
1429 	mutex_unlock(&u->bindlock);
1430 	err = -EINVAL;
1431 out_unlink:
1432 	/* failed after successful mknod?  unlink what we'd created... */
1433 	vfs_unlink(idmap, d_inode(parent.dentry), dentry, NULL);
1434 out_path:
1435 	done_path_create(&parent, dentry);
1436 out:
1437 	unix_release_addr(addr);
1438 	return err == -EEXIST ? -EADDRINUSE : err;
1439 }
1440 
1441 static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
1442 			      int addr_len)
1443 {
1444 	struct unix_sock *u = unix_sk(sk);
1445 	unsigned int new_hash, old_hash;
1446 	struct net *net = sock_net(sk);
1447 	struct unix_address *addr;
1448 	int err;
1449 
1450 	addr = unix_create_addr(sunaddr, addr_len);
1451 	if (!addr)
1452 		return -ENOMEM;
1453 
1454 	err = mutex_lock_interruptible(&u->bindlock);
1455 	if (err)
1456 		goto out;
1457 
1458 	if (u->addr) {
1459 		err = -EINVAL;
1460 		goto out_mutex;
1461 	}
1462 
1463 	old_hash = sk->sk_hash;
1464 	new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
1465 	unix_table_double_lock(net, old_hash, new_hash);
1466 
1467 	if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash))
1468 		goto out_spin;
1469 
1470 	__unix_set_addr_hash(net, sk, addr, new_hash);
1471 	unix_table_double_unlock(net, old_hash, new_hash);
1472 	mutex_unlock(&u->bindlock);
1473 	return 0;
1474 
1475 out_spin:
1476 	unix_table_double_unlock(net, old_hash, new_hash);
1477 	err = -EADDRINUSE;
1478 out_mutex:
1479 	mutex_unlock(&u->bindlock);
1480 out:
1481 	unix_release_addr(addr);
1482 	return err;
1483 }
1484 
1485 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1486 {
1487 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1488 	struct sock *sk = sock->sk;
1489 	int err;
1490 
1491 	if (addr_len == offsetof(struct sockaddr_un, sun_path) &&
1492 	    sunaddr->sun_family == AF_UNIX)
1493 		return unix_autobind(sk);
1494 
1495 	err = unix_validate_addr(sunaddr, addr_len);
1496 	if (err)
1497 		return err;
1498 
1499 	if (sunaddr->sun_path[0])
1500 		err = unix_bind_bsd(sk, sunaddr, addr_len);
1501 	else
1502 		err = unix_bind_abstract(sk, sunaddr, addr_len);
1503 
1504 	return err;
1505 }
1506 
1507 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1508 {
1509 	if (unlikely(sk1 == sk2) || !sk2) {
1510 		unix_state_lock(sk1);
1511 		return;
1512 	}
1513 
1514 	if (sk1 > sk2)
1515 		swap(sk1, sk2);
1516 
1517 	unix_state_lock(sk1);
1518 	unix_state_lock(sk2);
1519 }
1520 
1521 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1522 {
1523 	if (unlikely(sk1 == sk2) || !sk2) {
1524 		unix_state_unlock(sk1);
1525 		return;
1526 	}
1527 	unix_state_unlock(sk1);
1528 	unix_state_unlock(sk2);
1529 }
1530 
1531 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1532 			      int alen, int flags)
1533 {
1534 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1535 	struct sock *sk = sock->sk;
1536 	struct sock *other;
1537 	int err;
1538 
1539 	err = -EINVAL;
1540 	if (alen < offsetofend(struct sockaddr, sa_family))
1541 		goto out;
1542 
1543 	if (addr->sa_family != AF_UNSPEC) {
1544 		err = unix_validate_addr(sunaddr, alen);
1545 		if (err)
1546 			goto out;
1547 
1548 		err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, addr, &alen);
1549 		if (err)
1550 			goto out;
1551 
1552 		if (unix_may_passcred(sk) && !READ_ONCE(unix_sk(sk)->addr)) {
1553 			err = unix_autobind(sk);
1554 			if (err)
1555 				goto out;
1556 		}
1557 
1558 restart:
1559 		other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type, 0);
1560 		if (IS_ERR(other)) {
1561 			err = PTR_ERR(other);
1562 			goto out;
1563 		}
1564 
1565 		unix_state_double_lock(sk, other);
1566 
1567 		/* Apparently VFS overslept socket death. Retry. */
1568 		if (sock_flag(other, SOCK_DEAD)) {
1569 			unix_state_double_unlock(sk, other);
1570 			sock_put(other);
1571 			goto restart;
1572 		}
1573 
1574 		err = -EPERM;
1575 		if (!unix_may_send(sk, other))
1576 			goto out_unlock;
1577 
1578 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1579 		if (err)
1580 			goto out_unlock;
1581 
1582 		WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED);
1583 		WRITE_ONCE(other->sk_state, TCP_ESTABLISHED);
1584 	} else {
1585 		/*
1586 		 *	1003.1g breaking connected state with AF_UNSPEC
1587 		 */
1588 		other = NULL;
1589 		unix_state_double_lock(sk, other);
1590 	}
1591 
1592 	/*
1593 	 * If it was connected, reconnect.
1594 	 */
1595 	if (unix_peer(sk)) {
1596 		struct sock *old_peer = unix_peer(sk);
1597 
1598 		unix_peer(sk) = other;
1599 		if (!other)
1600 			WRITE_ONCE(sk->sk_state, TCP_CLOSE);
1601 		unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1602 
1603 		unix_state_double_unlock(sk, other);
1604 
1605 		if (other != old_peer) {
1606 			unix_dgram_disconnected(sk, old_peer);
1607 
1608 			unix_state_lock(old_peer);
1609 			if (!unix_peer(old_peer))
1610 				WRITE_ONCE(old_peer->sk_state, TCP_CLOSE);
1611 			unix_state_unlock(old_peer);
1612 		}
1613 
1614 		sock_put(old_peer);
1615 	} else {
1616 		unix_peer(sk) = other;
1617 		unix_state_double_unlock(sk, other);
1618 	}
1619 
1620 	return 0;
1621 
1622 out_unlock:
1623 	unix_state_double_unlock(sk, other);
1624 	sock_put(other);
1625 out:
1626 	return err;
1627 }
1628 
1629 static long unix_wait_for_peer(struct sock *other, long timeo)
1630 {
1631 	struct unix_sock *u = unix_sk(other);
1632 	int sched;
1633 	DEFINE_WAIT(wait);
1634 
1635 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1636 
1637 	sched = !sock_flag(other, SOCK_DEAD) &&
1638 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1639 		unix_recvq_full_lockless(other);
1640 
1641 	unix_state_unlock(other);
1642 
1643 	if (sched)
1644 		timeo = schedule_timeout(timeo);
1645 
1646 	finish_wait(&u->peer_wait, &wait);
1647 	return timeo;
1648 }
1649 
1650 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1651 			       int addr_len, int flags)
1652 {
1653 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1654 	struct sock *sk = sock->sk, *newsk = NULL, *other = NULL;
1655 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1656 	struct unix_peercred peercred = {};
1657 	struct net *net = sock_net(sk);
1658 	struct sk_buff *skb = NULL;
1659 	unsigned char state;
1660 	long timeo;
1661 	int err;
1662 
1663 	err = unix_validate_addr(sunaddr, addr_len);
1664 	if (err)
1665 		goto out;
1666 
1667 	err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, &addr_len);
1668 	if (err)
1669 		goto out;
1670 
1671 	if (unix_may_passcred(sk) && !READ_ONCE(u->addr)) {
1672 		err = unix_autobind(sk);
1673 		if (err)
1674 			goto out;
1675 	}
1676 
1677 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1678 
1679 	/* First of all allocate resources.
1680 	 * If we will make it after state is locked,
1681 	 * we will have to recheck all again in any case.
1682 	 */
1683 
1684 	/* create new sock for complete connection */
1685 	newsk = unix_create1(net, NULL, 0, sock->type);
1686 	if (IS_ERR(newsk)) {
1687 		err = PTR_ERR(newsk);
1688 		goto out;
1689 	}
1690 
1691 	err = prepare_peercred(&peercred);
1692 	if (err)
1693 		goto out;
1694 
1695 	/* Allocate skb for sending to listening sock */
1696 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1697 	if (!skb) {
1698 		err = -ENOMEM;
1699 		goto out_free_sk;
1700 	}
1701 
1702 restart:
1703 	/*  Find listening sock. */
1704 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, flags);
1705 	if (IS_ERR(other)) {
1706 		err = PTR_ERR(other);
1707 		goto out_free_skb;
1708 	}
1709 
1710 	unix_state_lock(other);
1711 
1712 	/* Apparently VFS overslept socket death. Retry. */
1713 	if (sock_flag(other, SOCK_DEAD)) {
1714 		unix_state_unlock(other);
1715 		sock_put(other);
1716 		goto restart;
1717 	}
1718 
1719 	if (other->sk_state != TCP_LISTEN ||
1720 	    other->sk_shutdown & RCV_SHUTDOWN) {
1721 		err = -ECONNREFUSED;
1722 		goto out_unlock;
1723 	}
1724 
1725 	if (unix_recvq_full_lockless(other)) {
1726 		if (!timeo) {
1727 			err = -EAGAIN;
1728 			goto out_unlock;
1729 		}
1730 
1731 		timeo = unix_wait_for_peer(other, timeo);
1732 		sock_put(other);
1733 
1734 		err = sock_intr_errno(timeo);
1735 		if (signal_pending(current))
1736 			goto out_free_skb;
1737 
1738 		goto restart;
1739 	}
1740 
1741 	/* self connect and simultaneous connect are eliminated
1742 	 * by rejecting TCP_LISTEN socket to avoid deadlock.
1743 	 */
1744 	state = READ_ONCE(sk->sk_state);
1745 	if (unlikely(state != TCP_CLOSE)) {
1746 		err = state == TCP_ESTABLISHED ? -EISCONN : -EINVAL;
1747 		goto out_unlock;
1748 	}
1749 
1750 	unix_state_lock(sk);
1751 
1752 	if (unlikely(sk->sk_state != TCP_CLOSE)) {
1753 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EINVAL;
1754 		unix_state_unlock(sk);
1755 		goto out_unlock;
1756 	}
1757 
1758 	err = security_unix_stream_connect(sk, other, newsk);
1759 	if (err) {
1760 		unix_state_unlock(sk);
1761 		goto out_unlock;
1762 	}
1763 
1764 	/* The way is open! Fastly set all the necessary fields... */
1765 
1766 	sock_hold(sk);
1767 	unix_peer(newsk) = sk;
1768 	newsk->sk_state = TCP_ESTABLISHED;
1769 	newsk->sk_type = sk->sk_type;
1770 	newsk->sk_scm_recv_flags = other->sk_scm_recv_flags;
1771 	init_peercred(newsk, &peercred);
1772 
1773 	newu = unix_sk(newsk);
1774 	newu->listener = other;
1775 	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1776 	otheru = unix_sk(other);
1777 
1778 	/* copy address information from listening to new sock
1779 	 *
1780 	 * The contents of *(otheru->addr) and otheru->path
1781 	 * are seen fully set up here, since we have found
1782 	 * otheru in hash under its lock.  Insertion into the
1783 	 * hash chain we'd found it in had been done in an
1784 	 * earlier critical area protected by the chain's lock,
1785 	 * the same one where we'd set *(otheru->addr) contents,
1786 	 * as well as otheru->path and otheru->addr itself.
1787 	 *
1788 	 * Using smp_store_release() here to set newu->addr
1789 	 * is enough to make those stores, as well as stores
1790 	 * to newu->path visible to anyone who gets newu->addr
1791 	 * by smp_load_acquire().  IOW, the same warranties
1792 	 * as for unix_sock instances bound in unix_bind() or
1793 	 * in unix_autobind().
1794 	 */
1795 	if (otheru->path.dentry) {
1796 		path_get(&otheru->path);
1797 		newu->path = otheru->path;
1798 	}
1799 	refcount_inc(&otheru->addr->refcnt);
1800 	smp_store_release(&newu->addr, otheru->addr);
1801 
1802 	/* Set credentials */
1803 	copy_peercred(sk, other);
1804 
1805 	sock->state	= SS_CONNECTED;
1806 	WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED);
1807 	sock_hold(newsk);
1808 
1809 	smp_mb__after_atomic();	/* sock_hold() does an atomic_inc() */
1810 	unix_peer(sk)	= newsk;
1811 
1812 	unix_state_unlock(sk);
1813 
1814 	/* take ten and send info to listening sock */
1815 	spin_lock(&other->sk_receive_queue.lock);
1816 	__skb_queue_tail(&other->sk_receive_queue, skb);
1817 	spin_unlock(&other->sk_receive_queue.lock);
1818 	unix_state_unlock(other);
1819 	other->sk_data_ready(other);
1820 	sock_put(other);
1821 	return 0;
1822 
1823 out_unlock:
1824 	unix_state_unlock(other);
1825 	sock_put(other);
1826 out_free_skb:
1827 	consume_skb(skb);
1828 out_free_sk:
1829 	unix_release_sock(newsk, 0);
1830 out:
1831 	drop_peercred(&peercred);
1832 	return err;
1833 }
1834 
1835 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1836 {
1837 	struct unix_peercred ska_peercred = {}, skb_peercred = {};
1838 	struct sock *ska = socka->sk, *skb = sockb->sk;
1839 	int err;
1840 
1841 	err = prepare_peercred(&ska_peercred);
1842 	if (err)
1843 		return err;
1844 
1845 	err = prepare_peercred(&skb_peercred);
1846 	if (err) {
1847 		drop_peercred(&ska_peercred);
1848 		return err;
1849 	}
1850 
1851 	/* Join our sockets back to back */
1852 	sock_hold(ska);
1853 	sock_hold(skb);
1854 	unix_peer(ska) = skb;
1855 	unix_peer(skb) = ska;
1856 	init_peercred(ska, &ska_peercred);
1857 	init_peercred(skb, &skb_peercred);
1858 
1859 	ska->sk_state = TCP_ESTABLISHED;
1860 	skb->sk_state = TCP_ESTABLISHED;
1861 	socka->state  = SS_CONNECTED;
1862 	sockb->state  = SS_CONNECTED;
1863 	return 0;
1864 }
1865 
1866 static int unix_accept(struct socket *sock, struct socket *newsock,
1867 		       struct proto_accept_arg *arg)
1868 {
1869 	struct sock *sk = sock->sk;
1870 	struct sk_buff *skb;
1871 	struct sock *tsk;
1872 
1873 	arg->err = -EOPNOTSUPP;
1874 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1875 		goto out;
1876 
1877 	arg->err = -EINVAL;
1878 	if (READ_ONCE(sk->sk_state) != TCP_LISTEN)
1879 		goto out;
1880 
1881 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1882 	 * so that no locks are necessary.
1883 	 */
1884 
1885 	skb = skb_recv_datagram(sk, (arg->flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
1886 				&arg->err);
1887 	if (!skb) {
1888 		/* This means receive shutdown. */
1889 		if (arg->err == 0)
1890 			arg->err = -EINVAL;
1891 		goto out;
1892 	}
1893 
1894 	tsk = skb->sk;
1895 	skb_free_datagram(sk, skb);
1896 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1897 
1898 	if (tsk->sk_type == SOCK_STREAM)
1899 		set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags);
1900 
1901 	/* attach accepted sock to socket */
1902 	unix_state_lock(tsk);
1903 	unix_update_edges(unix_sk(tsk));
1904 	newsock->state = SS_CONNECTED;
1905 	sock_graft(tsk, newsock);
1906 	unix_state_unlock(tsk);
1907 	return 0;
1908 
1909 out:
1910 	return arg->err;
1911 }
1912 
1913 
1914 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1915 {
1916 	struct sock *sk = sock->sk;
1917 	struct unix_address *addr;
1918 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1919 	int err = 0;
1920 
1921 	if (peer) {
1922 		sk = unix_peer_get(sk);
1923 
1924 		err = -ENOTCONN;
1925 		if (!sk)
1926 			goto out;
1927 		err = 0;
1928 	} else {
1929 		sock_hold(sk);
1930 	}
1931 
1932 	addr = smp_load_acquire(&unix_sk(sk)->addr);
1933 	if (!addr) {
1934 		sunaddr->sun_family = AF_UNIX;
1935 		sunaddr->sun_path[0] = 0;
1936 		err = offsetof(struct sockaddr_un, sun_path);
1937 	} else {
1938 		err = addr->len;
1939 		memcpy(sunaddr, addr->name, addr->len);
1940 
1941 		if (peer)
1942 			BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err,
1943 					       CGROUP_UNIX_GETPEERNAME);
1944 		else
1945 			BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err,
1946 					       CGROUP_UNIX_GETSOCKNAME);
1947 	}
1948 	sock_put(sk);
1949 out:
1950 	return err;
1951 }
1952 
1953 /* The "user->unix_inflight" variable is protected by the garbage
1954  * collection lock, and we just read it locklessly here. If you go
1955  * over the limit, there might be a tiny race in actually noticing
1956  * it across threads. Tough.
1957  */
1958 static inline bool too_many_unix_fds(struct task_struct *p)
1959 {
1960 	struct user_struct *user = current_user();
1961 
1962 	if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE)))
1963 		return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1964 	return false;
1965 }
1966 
1967 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1968 {
1969 	if (too_many_unix_fds(current))
1970 		return -ETOOMANYREFS;
1971 
1972 	UNIXCB(skb).fp = scm->fp;
1973 	scm->fp = NULL;
1974 
1975 	if (unix_prepare_fpl(UNIXCB(skb).fp))
1976 		return -ENOMEM;
1977 
1978 	return 0;
1979 }
1980 
1981 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1982 {
1983 	scm->fp = UNIXCB(skb).fp;
1984 	UNIXCB(skb).fp = NULL;
1985 
1986 	unix_destroy_fpl(scm->fp);
1987 }
1988 
1989 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1990 {
1991 	scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1992 }
1993 
1994 static void unix_destruct_scm(struct sk_buff *skb)
1995 {
1996 	struct scm_cookie scm;
1997 
1998 	memset(&scm, 0, sizeof(scm));
1999 	scm.pid  = UNIXCB(skb).pid;
2000 	if (UNIXCB(skb).fp)
2001 		unix_detach_fds(&scm, skb);
2002 
2003 	/* Alas, it calls VFS */
2004 	/* So fscking what? fput() had been SMP-safe since the last Summer */
2005 	scm_destroy(&scm);
2006 	sock_wfree(skb);
2007 }
2008 
2009 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
2010 {
2011 	int err = 0;
2012 
2013 	UNIXCB(skb).pid = get_pid(scm->pid);
2014 	UNIXCB(skb).uid = scm->creds.uid;
2015 	UNIXCB(skb).gid = scm->creds.gid;
2016 	UNIXCB(skb).fp = NULL;
2017 	unix_get_secdata(scm, skb);
2018 	if (scm->fp && send_fds)
2019 		err = unix_attach_fds(scm, skb);
2020 
2021 	skb->destructor = unix_destruct_scm;
2022 	return err;
2023 }
2024 
2025 /*
2026  * Some apps rely on write() giving SCM_CREDENTIALS
2027  * We include credentials if source or destination socket
2028  * asserted SOCK_PASSCRED.
2029  */
2030 static void unix_maybe_add_creds(struct sk_buff *skb, const struct sock *sk,
2031 				 const struct sock *other)
2032 {
2033 	if (UNIXCB(skb).pid)
2034 		return;
2035 
2036 	if (unix_may_passcred(sk) || unix_may_passcred(other) ||
2037 	    !other->sk_socket) {
2038 		UNIXCB(skb).pid = get_pid(task_tgid(current));
2039 		current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
2040 	}
2041 }
2042 
2043 static bool unix_skb_scm_eq(struct sk_buff *skb,
2044 			    struct scm_cookie *scm)
2045 {
2046 	return UNIXCB(skb).pid == scm->pid &&
2047 	       uid_eq(UNIXCB(skb).uid, scm->creds.uid) &&
2048 	       gid_eq(UNIXCB(skb).gid, scm->creds.gid) &&
2049 	       unix_secdata_eq(scm, skb);
2050 }
2051 
2052 static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
2053 {
2054 	struct scm_fp_list *fp = UNIXCB(skb).fp;
2055 	struct unix_sock *u = unix_sk(sk);
2056 
2057 	if (unlikely(fp && fp->count)) {
2058 		atomic_add(fp->count, &u->scm_stat.nr_fds);
2059 		unix_add_edges(fp, u);
2060 	}
2061 }
2062 
2063 static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
2064 {
2065 	struct scm_fp_list *fp = UNIXCB(skb).fp;
2066 	struct unix_sock *u = unix_sk(sk);
2067 
2068 	if (unlikely(fp && fp->count)) {
2069 		atomic_sub(fp->count, &u->scm_stat.nr_fds);
2070 		unix_del_edges(fp);
2071 	}
2072 }
2073 
2074 /*
2075  *	Send AF_UNIX data.
2076  */
2077 
2078 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
2079 			      size_t len)
2080 {
2081 	struct sock *sk = sock->sk, *other = NULL;
2082 	struct unix_sock *u = unix_sk(sk);
2083 	struct scm_cookie scm;
2084 	struct sk_buff *skb;
2085 	int data_len = 0;
2086 	int sk_locked;
2087 	long timeo;
2088 	int err;
2089 
2090 	err = scm_send(sock, msg, &scm, false);
2091 	if (err < 0)
2092 		return err;
2093 
2094 	wait_for_unix_gc(scm.fp);
2095 
2096 	if (msg->msg_flags & MSG_OOB) {
2097 		err = -EOPNOTSUPP;
2098 		goto out;
2099 	}
2100 
2101 	if (msg->msg_namelen) {
2102 		err = unix_validate_addr(msg->msg_name, msg->msg_namelen);
2103 		if (err)
2104 			goto out;
2105 
2106 		err = BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk,
2107 							    msg->msg_name,
2108 							    &msg->msg_namelen,
2109 							    NULL);
2110 		if (err)
2111 			goto out;
2112 	}
2113 
2114 	if (unix_may_passcred(sk) && !READ_ONCE(u->addr)) {
2115 		err = unix_autobind(sk);
2116 		if (err)
2117 			goto out;
2118 	}
2119 
2120 	if (len > READ_ONCE(sk->sk_sndbuf) - 32) {
2121 		err = -EMSGSIZE;
2122 		goto out;
2123 	}
2124 
2125 	if (len > SKB_MAX_ALLOC) {
2126 		data_len = min_t(size_t,
2127 				 len - SKB_MAX_ALLOC,
2128 				 MAX_SKB_FRAGS * PAGE_SIZE);
2129 		data_len = PAGE_ALIGN(data_len);
2130 
2131 		BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
2132 	}
2133 
2134 	skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
2135 				   msg->msg_flags & MSG_DONTWAIT, &err,
2136 				   PAGE_ALLOC_COSTLY_ORDER);
2137 	if (!skb)
2138 		goto out;
2139 
2140 	err = unix_scm_to_skb(&scm, skb, true);
2141 	if (err < 0)
2142 		goto out_free;
2143 
2144 	skb_put(skb, len - data_len);
2145 	skb->data_len = data_len;
2146 	skb->len = len;
2147 	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
2148 	if (err)
2149 		goto out_free;
2150 
2151 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
2152 
2153 	if (msg->msg_namelen) {
2154 lookup:
2155 		other = unix_find_other(sock_net(sk), msg->msg_name,
2156 					msg->msg_namelen, sk->sk_type, 0);
2157 		if (IS_ERR(other)) {
2158 			err = PTR_ERR(other);
2159 			goto out_free;
2160 		}
2161 	} else {
2162 		other = unix_peer_get(sk);
2163 		if (!other) {
2164 			err = -ENOTCONN;
2165 			goto out_free;
2166 		}
2167 	}
2168 
2169 	if (sk_filter(other, skb) < 0) {
2170 		/* Toss the packet but do not return any error to the sender */
2171 		err = len;
2172 		goto out_sock_put;
2173 	}
2174 
2175 restart:
2176 	sk_locked = 0;
2177 	unix_state_lock(other);
2178 restart_locked:
2179 
2180 	if (!unix_may_send(sk, other)) {
2181 		err = -EPERM;
2182 		goto out_unlock;
2183 	}
2184 
2185 	if (unlikely(sock_flag(other, SOCK_DEAD))) {
2186 		/* Check with 1003.1g - what should datagram error */
2187 
2188 		unix_state_unlock(other);
2189 
2190 		if (sk->sk_type == SOCK_SEQPACKET) {
2191 			/* We are here only when racing with unix_release_sock()
2192 			 * is clearing @other. Never change state to TCP_CLOSE
2193 			 * unlike SOCK_DGRAM wants.
2194 			 */
2195 			err = -EPIPE;
2196 			goto out_sock_put;
2197 		}
2198 
2199 		if (!sk_locked)
2200 			unix_state_lock(sk);
2201 
2202 		if (unix_peer(sk) == other) {
2203 			unix_peer(sk) = NULL;
2204 			unix_dgram_peer_wake_disconnect_wakeup(sk, other);
2205 
2206 			WRITE_ONCE(sk->sk_state, TCP_CLOSE);
2207 			unix_state_unlock(sk);
2208 
2209 			unix_dgram_disconnected(sk, other);
2210 			sock_put(other);
2211 			err = -ECONNREFUSED;
2212 			goto out_sock_put;
2213 		}
2214 
2215 		unix_state_unlock(sk);
2216 
2217 		if (!msg->msg_namelen) {
2218 			err = -ECONNRESET;
2219 			goto out_sock_put;
2220 		}
2221 
2222 		sock_put(other);
2223 		goto lookup;
2224 	}
2225 
2226 	if (other->sk_shutdown & RCV_SHUTDOWN) {
2227 		err = -EPIPE;
2228 		goto out_unlock;
2229 	}
2230 
2231 	if (UNIXCB(skb).fp && !other->sk_scm_rights) {
2232 		err = -EPERM;
2233 		goto out_unlock;
2234 	}
2235 
2236 	if (sk->sk_type != SOCK_SEQPACKET) {
2237 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
2238 		if (err)
2239 			goto out_unlock;
2240 	}
2241 
2242 	/* other == sk && unix_peer(other) != sk if
2243 	 * - unix_peer(sk) == NULL, destination address bound to sk
2244 	 * - unix_peer(sk) == sk by time of get but disconnected before lock
2245 	 */
2246 	if (other != sk &&
2247 	    unlikely(unix_peer(other) != sk &&
2248 	    unix_recvq_full_lockless(other))) {
2249 		if (timeo) {
2250 			timeo = unix_wait_for_peer(other, timeo);
2251 
2252 			err = sock_intr_errno(timeo);
2253 			if (signal_pending(current))
2254 				goto out_sock_put;
2255 
2256 			goto restart;
2257 		}
2258 
2259 		if (!sk_locked) {
2260 			unix_state_unlock(other);
2261 			unix_state_double_lock(sk, other);
2262 		}
2263 
2264 		if (unix_peer(sk) != other ||
2265 		    unix_dgram_peer_wake_me(sk, other)) {
2266 			err = -EAGAIN;
2267 			sk_locked = 1;
2268 			goto out_unlock;
2269 		}
2270 
2271 		if (!sk_locked) {
2272 			sk_locked = 1;
2273 			goto restart_locked;
2274 		}
2275 	}
2276 
2277 	if (unlikely(sk_locked))
2278 		unix_state_unlock(sk);
2279 
2280 	if (sock_flag(other, SOCK_RCVTSTAMP))
2281 		__net_timestamp(skb);
2282 
2283 	unix_maybe_add_creds(skb, sk, other);
2284 	scm_stat_add(other, skb);
2285 	skb_queue_tail(&other->sk_receive_queue, skb);
2286 	unix_state_unlock(other);
2287 	other->sk_data_ready(other);
2288 	sock_put(other);
2289 	scm_destroy(&scm);
2290 	return len;
2291 
2292 out_unlock:
2293 	if (sk_locked)
2294 		unix_state_unlock(sk);
2295 	unix_state_unlock(other);
2296 out_sock_put:
2297 	sock_put(other);
2298 out_free:
2299 	consume_skb(skb);
2300 out:
2301 	scm_destroy(&scm);
2302 	return err;
2303 }
2304 
2305 /* We use paged skbs for stream sockets, and limit occupancy to 32768
2306  * bytes, and a minimum of a full page.
2307  */
2308 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
2309 
2310 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2311 static int queue_oob(struct sock *sk, struct msghdr *msg, struct sock *other,
2312 		     struct scm_cookie *scm, bool fds_sent)
2313 {
2314 	struct unix_sock *ousk = unix_sk(other);
2315 	struct sk_buff *skb;
2316 	int err;
2317 
2318 	skb = sock_alloc_send_skb(sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
2319 
2320 	if (!skb)
2321 		return err;
2322 
2323 	err = unix_scm_to_skb(scm, skb, !fds_sent);
2324 	if (err < 0)
2325 		goto out;
2326 
2327 	skb_put(skb, 1);
2328 	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
2329 
2330 	if (err)
2331 		goto out;
2332 
2333 	unix_state_lock(other);
2334 
2335 	if (sock_flag(other, SOCK_DEAD) ||
2336 	    (other->sk_shutdown & RCV_SHUTDOWN)) {
2337 		err = -EPIPE;
2338 		goto out_unlock;
2339 	}
2340 
2341 	if (UNIXCB(skb).fp && !other->sk_scm_rights) {
2342 		err = -EPERM;
2343 		goto out_unlock;
2344 	}
2345 
2346 	unix_maybe_add_creds(skb, sk, other);
2347 	scm_stat_add(other, skb);
2348 
2349 	spin_lock(&other->sk_receive_queue.lock);
2350 	WRITE_ONCE(ousk->oob_skb, skb);
2351 	WRITE_ONCE(ousk->inq_len, ousk->inq_len + 1);
2352 	__skb_queue_tail(&other->sk_receive_queue, skb);
2353 	spin_unlock(&other->sk_receive_queue.lock);
2354 
2355 	sk_send_sigurg(other);
2356 	unix_state_unlock(other);
2357 	other->sk_data_ready(other);
2358 
2359 	return 0;
2360 out_unlock:
2361 	unix_state_unlock(other);
2362 out:
2363 	consume_skb(skb);
2364 	return err;
2365 }
2366 #endif
2367 
2368 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
2369 			       size_t len)
2370 {
2371 	struct sock *sk = sock->sk;
2372 	struct sk_buff *skb = NULL;
2373 	struct sock *other = NULL;
2374 	struct unix_sock *otheru;
2375 	struct scm_cookie scm;
2376 	bool fds_sent = false;
2377 	int err, sent = 0;
2378 
2379 	err = scm_send(sock, msg, &scm, false);
2380 	if (err < 0)
2381 		return err;
2382 
2383 	wait_for_unix_gc(scm.fp);
2384 
2385 	if (msg->msg_flags & MSG_OOB) {
2386 		err = -EOPNOTSUPP;
2387 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2388 		if (len)
2389 			len--;
2390 		else
2391 #endif
2392 			goto out_err;
2393 	}
2394 
2395 	if (msg->msg_namelen) {
2396 		err = READ_ONCE(sk->sk_state) == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
2397 		goto out_err;
2398 	}
2399 
2400 	other = unix_peer(sk);
2401 	if (!other) {
2402 		err = -ENOTCONN;
2403 		goto out_err;
2404 	}
2405 
2406 	otheru = unix_sk(other);
2407 
2408 	if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
2409 		goto out_pipe;
2410 
2411 	while (sent < len) {
2412 		int size = len - sent;
2413 		int data_len;
2414 
2415 		if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
2416 			skb = sock_alloc_send_pskb(sk, 0, 0,
2417 						   msg->msg_flags & MSG_DONTWAIT,
2418 						   &err, 0);
2419 		} else {
2420 			/* Keep two messages in the pipe so it schedules better */
2421 			size = min_t(int, size, (READ_ONCE(sk->sk_sndbuf) >> 1) - 64);
2422 
2423 			/* allow fallback to order-0 allocations */
2424 			size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
2425 
2426 			data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
2427 
2428 			data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
2429 
2430 			skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
2431 						   msg->msg_flags & MSG_DONTWAIT, &err,
2432 						   get_order(UNIX_SKB_FRAGS_SZ));
2433 		}
2434 		if (!skb)
2435 			goto out_err;
2436 
2437 		/* Only send the fds in the first buffer */
2438 		err = unix_scm_to_skb(&scm, skb, !fds_sent);
2439 		if (err < 0)
2440 			goto out_free;
2441 
2442 		fds_sent = true;
2443 
2444 		if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
2445 			skb->ip_summed = CHECKSUM_UNNECESSARY;
2446 			err = skb_splice_from_iter(skb, &msg->msg_iter, size);
2447 			if (err < 0)
2448 				goto out_free;
2449 
2450 			size = err;
2451 			refcount_add(size, &sk->sk_wmem_alloc);
2452 		} else {
2453 			skb_put(skb, size - data_len);
2454 			skb->data_len = data_len;
2455 			skb->len = size;
2456 			err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
2457 			if (err)
2458 				goto out_free;
2459 		}
2460 
2461 		unix_state_lock(other);
2462 
2463 		if (sock_flag(other, SOCK_DEAD) ||
2464 		    (other->sk_shutdown & RCV_SHUTDOWN))
2465 			goto out_pipe_unlock;
2466 
2467 		if (UNIXCB(skb).fp && !other->sk_scm_rights) {
2468 			unix_state_unlock(other);
2469 			err = -EPERM;
2470 			goto out_free;
2471 		}
2472 
2473 		unix_maybe_add_creds(skb, sk, other);
2474 		scm_stat_add(other, skb);
2475 
2476 		spin_lock(&other->sk_receive_queue.lock);
2477 		WRITE_ONCE(otheru->inq_len, otheru->inq_len + skb->len);
2478 		__skb_queue_tail(&other->sk_receive_queue, skb);
2479 		spin_unlock(&other->sk_receive_queue.lock);
2480 
2481 		unix_state_unlock(other);
2482 		other->sk_data_ready(other);
2483 		sent += size;
2484 	}
2485 
2486 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2487 	if (msg->msg_flags & MSG_OOB) {
2488 		err = queue_oob(sk, msg, other, &scm, fds_sent);
2489 		if (err)
2490 			goto out_err;
2491 		sent++;
2492 	}
2493 #endif
2494 
2495 	scm_destroy(&scm);
2496 
2497 	return sent;
2498 
2499 out_pipe_unlock:
2500 	unix_state_unlock(other);
2501 out_pipe:
2502 	if (!sent && !(msg->msg_flags & MSG_NOSIGNAL))
2503 		send_sig(SIGPIPE, current, 0);
2504 	err = -EPIPE;
2505 out_free:
2506 	consume_skb(skb);
2507 out_err:
2508 	scm_destroy(&scm);
2509 	return sent ? : err;
2510 }
2511 
2512 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2513 				  size_t len)
2514 {
2515 	int err;
2516 	struct sock *sk = sock->sk;
2517 
2518 	err = sock_error(sk);
2519 	if (err)
2520 		return err;
2521 
2522 	if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)
2523 		return -ENOTCONN;
2524 
2525 	if (msg->msg_namelen)
2526 		msg->msg_namelen = 0;
2527 
2528 	return unix_dgram_sendmsg(sock, msg, len);
2529 }
2530 
2531 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2532 				  size_t size, int flags)
2533 {
2534 	struct sock *sk = sock->sk;
2535 
2536 	if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)
2537 		return -ENOTCONN;
2538 
2539 	return unix_dgram_recvmsg(sock, msg, size, flags);
2540 }
2541 
2542 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2543 {
2544 	struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2545 
2546 	if (addr) {
2547 		msg->msg_namelen = addr->len;
2548 		memcpy(msg->msg_name, addr->name, addr->len);
2549 	}
2550 }
2551 
2552 int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
2553 			 int flags)
2554 {
2555 	struct scm_cookie scm;
2556 	struct socket *sock = sk->sk_socket;
2557 	struct unix_sock *u = unix_sk(sk);
2558 	struct sk_buff *skb, *last;
2559 	long timeo;
2560 	int skip;
2561 	int err;
2562 
2563 	err = -EOPNOTSUPP;
2564 	if (flags&MSG_OOB)
2565 		goto out;
2566 
2567 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2568 
2569 	do {
2570 		mutex_lock(&u->iolock);
2571 
2572 		skip = sk_peek_offset(sk, flags);
2573 		skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2574 					      &skip, &err, &last);
2575 		if (skb) {
2576 			if (!(flags & MSG_PEEK))
2577 				scm_stat_del(sk, skb);
2578 			break;
2579 		}
2580 
2581 		mutex_unlock(&u->iolock);
2582 
2583 		if (err != -EAGAIN)
2584 			break;
2585 	} while (timeo &&
2586 		 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2587 					      &err, &timeo, last));
2588 
2589 	if (!skb) { /* implies iolock unlocked */
2590 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2591 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2592 		    (READ_ONCE(sk->sk_shutdown) & RCV_SHUTDOWN))
2593 			err = 0;
2594 		goto out;
2595 	}
2596 
2597 	if (wq_has_sleeper(&u->peer_wait))
2598 		wake_up_interruptible_sync_poll(&u->peer_wait,
2599 						EPOLLOUT | EPOLLWRNORM |
2600 						EPOLLWRBAND);
2601 
2602 	if (msg->msg_name) {
2603 		unix_copy_addr(msg, skb->sk);
2604 
2605 		BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk,
2606 						      msg->msg_name,
2607 						      &msg->msg_namelen);
2608 	}
2609 
2610 	if (size > skb->len - skip)
2611 		size = skb->len - skip;
2612 	else if (size < skb->len - skip)
2613 		msg->msg_flags |= MSG_TRUNC;
2614 
2615 	err = skb_copy_datagram_msg(skb, skip, msg, size);
2616 	if (err)
2617 		goto out_free;
2618 
2619 	if (sock_flag(sk, SOCK_RCVTSTAMP))
2620 		__sock_recv_timestamp(msg, sk, skb);
2621 
2622 	memset(&scm, 0, sizeof(scm));
2623 
2624 	scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2625 	unix_set_secdata(&scm, skb);
2626 
2627 	if (!(flags & MSG_PEEK)) {
2628 		if (UNIXCB(skb).fp)
2629 			unix_detach_fds(&scm, skb);
2630 
2631 		sk_peek_offset_bwd(sk, skb->len);
2632 	} else {
2633 		/* It is questionable: on PEEK we could:
2634 		   - do not return fds - good, but too simple 8)
2635 		   - return fds, and do not return them on read (old strategy,
2636 		     apparently wrong)
2637 		   - clone fds (I chose it for now, it is the most universal
2638 		     solution)
2639 
2640 		   POSIX 1003.1g does not actually define this clearly
2641 		   at all. POSIX 1003.1g doesn't define a lot of things
2642 		   clearly however!
2643 
2644 		*/
2645 
2646 		sk_peek_offset_fwd(sk, size);
2647 
2648 		if (UNIXCB(skb).fp)
2649 			unix_peek_fds(&scm, skb);
2650 	}
2651 	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2652 
2653 	scm_recv_unix(sock, msg, &scm, flags);
2654 
2655 out_free:
2656 	skb_free_datagram(sk, skb);
2657 	mutex_unlock(&u->iolock);
2658 out:
2659 	return err;
2660 }
2661 
2662 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
2663 			      int flags)
2664 {
2665 	struct sock *sk = sock->sk;
2666 
2667 #ifdef CONFIG_BPF_SYSCALL
2668 	const struct proto *prot = READ_ONCE(sk->sk_prot);
2669 
2670 	if (prot != &unix_dgram_proto)
2671 		return prot->recvmsg(sk, msg, size, flags, NULL);
2672 #endif
2673 	return __unix_dgram_recvmsg(sk, msg, size, flags);
2674 }
2675 
2676 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
2677 {
2678 	struct unix_sock *u = unix_sk(sk);
2679 	struct sk_buff *skb;
2680 	int err;
2681 
2682 	mutex_lock(&u->iolock);
2683 	skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
2684 	mutex_unlock(&u->iolock);
2685 	if (!skb)
2686 		return err;
2687 
2688 	return recv_actor(sk, skb);
2689 }
2690 
2691 /*
2692  *	Sleep until more data has arrived. But check for races..
2693  */
2694 static long unix_stream_data_wait(struct sock *sk, long timeo,
2695 				  struct sk_buff *last, unsigned int last_len,
2696 				  bool freezable)
2697 {
2698 	unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE;
2699 	struct sk_buff *tail;
2700 	DEFINE_WAIT(wait);
2701 
2702 	unix_state_lock(sk);
2703 
2704 	for (;;) {
2705 		prepare_to_wait(sk_sleep(sk), &wait, state);
2706 
2707 		tail = skb_peek_tail(&sk->sk_receive_queue);
2708 		if (tail != last ||
2709 		    (tail && tail->len != last_len) ||
2710 		    sk->sk_err ||
2711 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
2712 		    signal_pending(current) ||
2713 		    !timeo)
2714 			break;
2715 
2716 		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2717 		unix_state_unlock(sk);
2718 		timeo = schedule_timeout(timeo);
2719 		unix_state_lock(sk);
2720 
2721 		if (sock_flag(sk, SOCK_DEAD))
2722 			break;
2723 
2724 		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2725 	}
2726 
2727 	finish_wait(sk_sleep(sk), &wait);
2728 	unix_state_unlock(sk);
2729 	return timeo;
2730 }
2731 
2732 struct unix_stream_read_state {
2733 	int (*recv_actor)(struct sk_buff *, int, int,
2734 			  struct unix_stream_read_state *);
2735 	struct socket *socket;
2736 	struct msghdr *msg;
2737 	struct pipe_inode_info *pipe;
2738 	size_t size;
2739 	int flags;
2740 	unsigned int splice_flags;
2741 };
2742 
2743 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2744 static int unix_stream_recv_urg(struct unix_stream_read_state *state)
2745 {
2746 	struct sk_buff *oob_skb, *read_skb = NULL;
2747 	struct socket *sock = state->socket;
2748 	struct sock *sk = sock->sk;
2749 	struct unix_sock *u = unix_sk(sk);
2750 	int chunk = 1;
2751 
2752 	mutex_lock(&u->iolock);
2753 	unix_state_lock(sk);
2754 	spin_lock(&sk->sk_receive_queue.lock);
2755 
2756 	if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
2757 		spin_unlock(&sk->sk_receive_queue.lock);
2758 		unix_state_unlock(sk);
2759 		mutex_unlock(&u->iolock);
2760 		return -EINVAL;
2761 	}
2762 
2763 	oob_skb = u->oob_skb;
2764 
2765 	if (!(state->flags & MSG_PEEK)) {
2766 		WRITE_ONCE(u->oob_skb, NULL);
2767 		WRITE_ONCE(u->inq_len, u->inq_len - 1);
2768 
2769 		if (oob_skb->prev != (struct sk_buff *)&sk->sk_receive_queue &&
2770 		    !unix_skb_len(oob_skb->prev)) {
2771 			read_skb = oob_skb->prev;
2772 			__skb_unlink(read_skb, &sk->sk_receive_queue);
2773 		}
2774 	}
2775 
2776 	spin_unlock(&sk->sk_receive_queue.lock);
2777 	unix_state_unlock(sk);
2778 
2779 	chunk = state->recv_actor(oob_skb, 0, chunk, state);
2780 
2781 	if (!(state->flags & MSG_PEEK))
2782 		UNIXCB(oob_skb).consumed += 1;
2783 
2784 	mutex_unlock(&u->iolock);
2785 
2786 	consume_skb(read_skb);
2787 
2788 	if (chunk < 0)
2789 		return -EFAULT;
2790 
2791 	state->msg->msg_flags |= MSG_OOB;
2792 	return 1;
2793 }
2794 
2795 static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
2796 				  int flags, int copied)
2797 {
2798 	struct sk_buff *read_skb = NULL, *unread_skb = NULL;
2799 	struct unix_sock *u = unix_sk(sk);
2800 
2801 	if (likely(unix_skb_len(skb) && skb != READ_ONCE(u->oob_skb)))
2802 		return skb;
2803 
2804 	spin_lock(&sk->sk_receive_queue.lock);
2805 
2806 	if (!unix_skb_len(skb)) {
2807 		if (copied && (!u->oob_skb || skb == u->oob_skb)) {
2808 			skb = NULL;
2809 		} else if (flags & MSG_PEEK) {
2810 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2811 		} else {
2812 			read_skb = skb;
2813 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2814 			__skb_unlink(read_skb, &sk->sk_receive_queue);
2815 		}
2816 
2817 		if (!skb)
2818 			goto unlock;
2819 	}
2820 
2821 	if (skb != u->oob_skb)
2822 		goto unlock;
2823 
2824 	if (copied) {
2825 		skb = NULL;
2826 	} else if (!(flags & MSG_PEEK)) {
2827 		WRITE_ONCE(u->oob_skb, NULL);
2828 
2829 		if (!sock_flag(sk, SOCK_URGINLINE)) {
2830 			__skb_unlink(skb, &sk->sk_receive_queue);
2831 			unread_skb = skb;
2832 			skb = skb_peek(&sk->sk_receive_queue);
2833 		}
2834 	} else if (!sock_flag(sk, SOCK_URGINLINE)) {
2835 		skb = skb_peek_next(skb, &sk->sk_receive_queue);
2836 	}
2837 
2838 unlock:
2839 	spin_unlock(&sk->sk_receive_queue.lock);
2840 
2841 	consume_skb(read_skb);
2842 	kfree_skb_reason(unread_skb, SKB_DROP_REASON_UNIX_SKIP_OOB);
2843 
2844 	return skb;
2845 }
2846 #endif
2847 
2848 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
2849 {
2850 	struct sk_buff_head *queue = &sk->sk_receive_queue;
2851 	struct unix_sock *u = unix_sk(sk);
2852 	struct sk_buff *skb;
2853 	int err;
2854 
2855 	if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED))
2856 		return -ENOTCONN;
2857 
2858 	err = sock_error(sk);
2859 	if (err)
2860 		return err;
2861 
2862 	mutex_lock(&u->iolock);
2863 	spin_lock(&queue->lock);
2864 
2865 	skb = __skb_dequeue(queue);
2866 	if (!skb) {
2867 		spin_unlock(&queue->lock);
2868 		mutex_unlock(&u->iolock);
2869 		return -EAGAIN;
2870 	}
2871 
2872 	WRITE_ONCE(u->inq_len, u->inq_len - skb->len);
2873 
2874 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2875 	if (skb == u->oob_skb) {
2876 		WRITE_ONCE(u->oob_skb, NULL);
2877 		spin_unlock(&queue->lock);
2878 		mutex_unlock(&u->iolock);
2879 
2880 		kfree_skb_reason(skb, SKB_DROP_REASON_UNIX_SKIP_OOB);
2881 		return -EAGAIN;
2882 	}
2883 #endif
2884 
2885 	spin_unlock(&queue->lock);
2886 	mutex_unlock(&u->iolock);
2887 
2888 	return recv_actor(sk, skb);
2889 }
2890 
2891 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2892 				    bool freezable)
2893 {
2894 	int noblock = state->flags & MSG_DONTWAIT;
2895 	struct socket *sock = state->socket;
2896 	struct msghdr *msg = state->msg;
2897 	struct sock *sk = sock->sk;
2898 	size_t size = state->size;
2899 	int flags = state->flags;
2900 	bool check_creds = false;
2901 	struct scm_cookie scm;
2902 	unsigned int last_len;
2903 	struct unix_sock *u;
2904 	int copied = 0;
2905 	int err = 0;
2906 	long timeo;
2907 	int target;
2908 	int skip;
2909 
2910 	if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)) {
2911 		err = -EINVAL;
2912 		goto out;
2913 	}
2914 
2915 	if (unlikely(flags & MSG_OOB)) {
2916 		err = -EOPNOTSUPP;
2917 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2918 		err = unix_stream_recv_urg(state);
2919 #endif
2920 		goto out;
2921 	}
2922 
2923 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2924 	timeo = sock_rcvtimeo(sk, noblock);
2925 
2926 	memset(&scm, 0, sizeof(scm));
2927 
2928 	u = unix_sk(sk);
2929 
2930 	/* Lock the socket to prevent queue disordering
2931 	 * while sleeps in memcpy_tomsg
2932 	 */
2933 	mutex_lock(&u->iolock);
2934 
2935 	skip = max(sk_peek_offset(sk, flags), 0);
2936 
2937 	do {
2938 		struct sk_buff *skb, *last;
2939 		int chunk;
2940 
2941 redo:
2942 		unix_state_lock(sk);
2943 		if (sock_flag(sk, SOCK_DEAD)) {
2944 			err = -ECONNRESET;
2945 			goto unlock;
2946 		}
2947 		last = skb = skb_peek(&sk->sk_receive_queue);
2948 		last_len = last ? last->len : 0;
2949 
2950 again:
2951 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2952 		if (skb) {
2953 			skb = manage_oob(skb, sk, flags, copied);
2954 			if (!skb && copied) {
2955 				unix_state_unlock(sk);
2956 				break;
2957 			}
2958 		}
2959 #endif
2960 		if (skb == NULL) {
2961 			if (copied >= target)
2962 				goto unlock;
2963 
2964 			/*
2965 			 *	POSIX 1003.1g mandates this order.
2966 			 */
2967 
2968 			err = sock_error(sk);
2969 			if (err)
2970 				goto unlock;
2971 			if (sk->sk_shutdown & RCV_SHUTDOWN)
2972 				goto unlock;
2973 
2974 			unix_state_unlock(sk);
2975 			if (!timeo) {
2976 				err = -EAGAIN;
2977 				break;
2978 			}
2979 
2980 			mutex_unlock(&u->iolock);
2981 
2982 			timeo = unix_stream_data_wait(sk, timeo, last,
2983 						      last_len, freezable);
2984 
2985 			if (signal_pending(current)) {
2986 				err = sock_intr_errno(timeo);
2987 				scm_destroy(&scm);
2988 				goto out;
2989 			}
2990 
2991 			mutex_lock(&u->iolock);
2992 			goto redo;
2993 unlock:
2994 			unix_state_unlock(sk);
2995 			break;
2996 		}
2997 
2998 		while (skip >= unix_skb_len(skb)) {
2999 			skip -= unix_skb_len(skb);
3000 			last = skb;
3001 			last_len = skb->len;
3002 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
3003 			if (!skb)
3004 				goto again;
3005 		}
3006 
3007 		unix_state_unlock(sk);
3008 
3009 		if (check_creds) {
3010 			/* Never glue messages from different writers */
3011 			if (!unix_skb_scm_eq(skb, &scm))
3012 				break;
3013 		} else if (unix_may_passcred(sk)) {
3014 			/* Copy credentials */
3015 			scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
3016 			unix_set_secdata(&scm, skb);
3017 			check_creds = true;
3018 		}
3019 
3020 		/* Copy address just once */
3021 		if (msg && msg->msg_name) {
3022 			DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
3023 
3024 			unix_copy_addr(msg, skb->sk);
3025 			BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, msg->msg_name,
3026 							      &msg->msg_namelen);
3027 
3028 			sunaddr = NULL;
3029 		}
3030 
3031 		chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
3032 		chunk = state->recv_actor(skb, skip, chunk, state);
3033 		if (chunk < 0) {
3034 			if (copied == 0)
3035 				copied = -EFAULT;
3036 			break;
3037 		}
3038 		copied += chunk;
3039 		size -= chunk;
3040 
3041 		/* Mark read part of skb as used */
3042 		if (!(flags & MSG_PEEK)) {
3043 			UNIXCB(skb).consumed += chunk;
3044 
3045 			sk_peek_offset_bwd(sk, chunk);
3046 
3047 			if (UNIXCB(skb).fp) {
3048 				scm_stat_del(sk, skb);
3049 				unix_detach_fds(&scm, skb);
3050 			}
3051 
3052 			if (unix_skb_len(skb))
3053 				break;
3054 
3055 			spin_lock(&sk->sk_receive_queue.lock);
3056 			WRITE_ONCE(u->inq_len, u->inq_len - skb->len);
3057 			__skb_unlink(skb, &sk->sk_receive_queue);
3058 			spin_unlock(&sk->sk_receive_queue.lock);
3059 
3060 			consume_skb(skb);
3061 
3062 			if (scm.fp)
3063 				break;
3064 		} else {
3065 			/* It is questionable, see note in unix_dgram_recvmsg.
3066 			 */
3067 			if (UNIXCB(skb).fp)
3068 				unix_peek_fds(&scm, skb);
3069 
3070 			sk_peek_offset_fwd(sk, chunk);
3071 
3072 			if (UNIXCB(skb).fp)
3073 				break;
3074 
3075 			skip = 0;
3076 			last = skb;
3077 			last_len = skb->len;
3078 			unix_state_lock(sk);
3079 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
3080 			if (skb)
3081 				goto again;
3082 			unix_state_unlock(sk);
3083 			break;
3084 		}
3085 	} while (size);
3086 
3087 	mutex_unlock(&u->iolock);
3088 	if (msg) {
3089 		scm_recv_unix(sock, msg, &scm, flags);
3090 
3091 		if (READ_ONCE(u->recvmsg_inq) || msg->msg_get_inq) {
3092 			msg->msg_inq = READ_ONCE(u->inq_len);
3093 			put_cmsg(msg, SOL_SOCKET, SCM_INQ,
3094 				 sizeof(msg->msg_inq), &msg->msg_inq);
3095 		}
3096 	} else {
3097 		scm_destroy(&scm);
3098 	}
3099 out:
3100 	return copied ? : err;
3101 }
3102 
3103 static int unix_stream_read_actor(struct sk_buff *skb,
3104 				  int skip, int chunk,
3105 				  struct unix_stream_read_state *state)
3106 {
3107 	int ret;
3108 
3109 	ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
3110 				    state->msg, chunk);
3111 	return ret ?: chunk;
3112 }
3113 
3114 int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg,
3115 			  size_t size, int flags)
3116 {
3117 	struct unix_stream_read_state state = {
3118 		.recv_actor = unix_stream_read_actor,
3119 		.socket = sk->sk_socket,
3120 		.msg = msg,
3121 		.size = size,
3122 		.flags = flags
3123 	};
3124 
3125 	return unix_stream_read_generic(&state, true);
3126 }
3127 
3128 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
3129 			       size_t size, int flags)
3130 {
3131 	struct unix_stream_read_state state = {
3132 		.recv_actor = unix_stream_read_actor,
3133 		.socket = sock,
3134 		.msg = msg,
3135 		.size = size,
3136 		.flags = flags
3137 	};
3138 
3139 #ifdef CONFIG_BPF_SYSCALL
3140 	struct sock *sk = sock->sk;
3141 	const struct proto *prot = READ_ONCE(sk->sk_prot);
3142 
3143 	if (prot != &unix_stream_proto)
3144 		return prot->recvmsg(sk, msg, size, flags, NULL);
3145 #endif
3146 	return unix_stream_read_generic(&state, true);
3147 }
3148 
3149 static int unix_stream_splice_actor(struct sk_buff *skb,
3150 				    int skip, int chunk,
3151 				    struct unix_stream_read_state *state)
3152 {
3153 	return skb_splice_bits(skb, state->socket->sk,
3154 			       UNIXCB(skb).consumed + skip,
3155 			       state->pipe, chunk, state->splice_flags);
3156 }
3157 
3158 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
3159 				       struct pipe_inode_info *pipe,
3160 				       size_t size, unsigned int flags)
3161 {
3162 	struct unix_stream_read_state state = {
3163 		.recv_actor = unix_stream_splice_actor,
3164 		.socket = sock,
3165 		.pipe = pipe,
3166 		.size = size,
3167 		.splice_flags = flags,
3168 	};
3169 
3170 	if (unlikely(*ppos))
3171 		return -ESPIPE;
3172 
3173 	if (sock->file->f_flags & O_NONBLOCK ||
3174 	    flags & SPLICE_F_NONBLOCK)
3175 		state.flags = MSG_DONTWAIT;
3176 
3177 	return unix_stream_read_generic(&state, false);
3178 }
3179 
3180 static int unix_shutdown(struct socket *sock, int mode)
3181 {
3182 	struct sock *sk = sock->sk;
3183 	struct sock *other;
3184 
3185 	if (mode < SHUT_RD || mode > SHUT_RDWR)
3186 		return -EINVAL;
3187 	/* This maps:
3188 	 * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
3189 	 * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
3190 	 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
3191 	 */
3192 	++mode;
3193 
3194 	unix_state_lock(sk);
3195 	WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode);
3196 	other = unix_peer(sk);
3197 	if (other)
3198 		sock_hold(other);
3199 	unix_state_unlock(sk);
3200 	sk->sk_state_change(sk);
3201 
3202 	if (other &&
3203 		(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
3204 
3205 		int peer_mode = 0;
3206 		const struct proto *prot = READ_ONCE(other->sk_prot);
3207 
3208 		if (prot->unhash)
3209 			prot->unhash(other);
3210 		if (mode&RCV_SHUTDOWN)
3211 			peer_mode |= SEND_SHUTDOWN;
3212 		if (mode&SEND_SHUTDOWN)
3213 			peer_mode |= RCV_SHUTDOWN;
3214 		unix_state_lock(other);
3215 		WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode);
3216 		unix_state_unlock(other);
3217 		other->sk_state_change(other);
3218 		if (peer_mode == SHUTDOWN_MASK)
3219 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
3220 		else if (peer_mode & RCV_SHUTDOWN)
3221 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
3222 	}
3223 	if (other)
3224 		sock_put(other);
3225 
3226 	return 0;
3227 }
3228 
3229 long unix_inq_len(struct sock *sk)
3230 {
3231 	struct sk_buff *skb;
3232 	long amount = 0;
3233 
3234 	if (READ_ONCE(sk->sk_state) == TCP_LISTEN)
3235 		return -EINVAL;
3236 
3237 	if (sk->sk_type == SOCK_STREAM)
3238 		return READ_ONCE(unix_sk(sk)->inq_len);
3239 
3240 	spin_lock(&sk->sk_receive_queue.lock);
3241 	if (sk->sk_type == SOCK_SEQPACKET) {
3242 		skb_queue_walk(&sk->sk_receive_queue, skb)
3243 			amount += unix_skb_len(skb);
3244 	} else {
3245 		skb = skb_peek(&sk->sk_receive_queue);
3246 		if (skb)
3247 			amount = skb->len;
3248 	}
3249 	spin_unlock(&sk->sk_receive_queue.lock);
3250 
3251 	return amount;
3252 }
3253 EXPORT_SYMBOL_GPL(unix_inq_len);
3254 
3255 long unix_outq_len(struct sock *sk)
3256 {
3257 	return sk_wmem_alloc_get(sk);
3258 }
3259 EXPORT_SYMBOL_GPL(unix_outq_len);
3260 
3261 static int unix_open_file(struct sock *sk)
3262 {
3263 	struct path path;
3264 	struct file *f;
3265 	int fd;
3266 
3267 	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
3268 		return -EPERM;
3269 
3270 	if (!smp_load_acquire(&unix_sk(sk)->addr))
3271 		return -ENOENT;
3272 
3273 	path = unix_sk(sk)->path;
3274 	if (!path.dentry)
3275 		return -ENOENT;
3276 
3277 	path_get(&path);
3278 
3279 	fd = get_unused_fd_flags(O_CLOEXEC);
3280 	if (fd < 0)
3281 		goto out;
3282 
3283 	f = dentry_open(&path, O_PATH, current_cred());
3284 	if (IS_ERR(f)) {
3285 		put_unused_fd(fd);
3286 		fd = PTR_ERR(f);
3287 		goto out;
3288 	}
3289 
3290 	fd_install(fd, f);
3291 out:
3292 	path_put(&path);
3293 
3294 	return fd;
3295 }
3296 
3297 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3298 {
3299 	struct sock *sk = sock->sk;
3300 	long amount = 0;
3301 	int err;
3302 
3303 	switch (cmd) {
3304 	case SIOCOUTQ:
3305 		amount = unix_outq_len(sk);
3306 		err = put_user(amount, (int __user *)arg);
3307 		break;
3308 	case SIOCINQ:
3309 		amount = unix_inq_len(sk);
3310 		if (amount < 0)
3311 			err = amount;
3312 		else
3313 			err = put_user(amount, (int __user *)arg);
3314 		break;
3315 	case SIOCUNIXFILE:
3316 		err = unix_open_file(sk);
3317 		break;
3318 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3319 	case SIOCATMARK:
3320 		{
3321 			struct unix_sock *u = unix_sk(sk);
3322 			struct sk_buff *skb;
3323 			int answ = 0;
3324 
3325 			mutex_lock(&u->iolock);
3326 
3327 			skb = skb_peek(&sk->sk_receive_queue);
3328 			if (skb) {
3329 				struct sk_buff *oob_skb = READ_ONCE(u->oob_skb);
3330 				struct sk_buff *next_skb;
3331 
3332 				next_skb = skb_peek_next(skb, &sk->sk_receive_queue);
3333 
3334 				if (skb == oob_skb ||
3335 				    (!unix_skb_len(skb) &&
3336 				     (!oob_skb || next_skb == oob_skb)))
3337 					answ = 1;
3338 			}
3339 
3340 			mutex_unlock(&u->iolock);
3341 
3342 			err = put_user(answ, (int __user *)arg);
3343 		}
3344 		break;
3345 #endif
3346 	default:
3347 		err = -ENOIOCTLCMD;
3348 		break;
3349 	}
3350 	return err;
3351 }
3352 
3353 #ifdef CONFIG_COMPAT
3354 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3355 {
3356 	return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
3357 }
3358 #endif
3359 
3360 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
3361 {
3362 	struct sock *sk = sock->sk;
3363 	unsigned char state;
3364 	__poll_t mask;
3365 	u8 shutdown;
3366 
3367 	sock_poll_wait(file, sock, wait);
3368 	mask = 0;
3369 	shutdown = READ_ONCE(sk->sk_shutdown);
3370 	state = READ_ONCE(sk->sk_state);
3371 
3372 	/* exceptional events? */
3373 	if (READ_ONCE(sk->sk_err))
3374 		mask |= EPOLLERR;
3375 	if (shutdown == SHUTDOWN_MASK)
3376 		mask |= EPOLLHUP;
3377 	if (shutdown & RCV_SHUTDOWN)
3378 		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3379 
3380 	/* readable? */
3381 	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3382 		mask |= EPOLLIN | EPOLLRDNORM;
3383 	if (sk_is_readable(sk))
3384 		mask |= EPOLLIN | EPOLLRDNORM;
3385 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3386 	if (READ_ONCE(unix_sk(sk)->oob_skb))
3387 		mask |= EPOLLPRI;
3388 #endif
3389 
3390 	/* Connection-based need to check for termination and startup */
3391 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
3392 	    state == TCP_CLOSE)
3393 		mask |= EPOLLHUP;
3394 
3395 	/*
3396 	 * we set writable also when the other side has shut down the
3397 	 * connection. This prevents stuck sockets.
3398 	 */
3399 	if (unix_writable(sk, state))
3400 		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3401 
3402 	return mask;
3403 }
3404 
3405 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
3406 				    poll_table *wait)
3407 {
3408 	struct sock *sk = sock->sk, *other;
3409 	unsigned int writable;
3410 	unsigned char state;
3411 	__poll_t mask;
3412 	u8 shutdown;
3413 
3414 	sock_poll_wait(file, sock, wait);
3415 	mask = 0;
3416 	shutdown = READ_ONCE(sk->sk_shutdown);
3417 	state = READ_ONCE(sk->sk_state);
3418 
3419 	/* exceptional events? */
3420 	if (READ_ONCE(sk->sk_err) ||
3421 	    !skb_queue_empty_lockless(&sk->sk_error_queue))
3422 		mask |= EPOLLERR |
3423 			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
3424 
3425 	if (shutdown & RCV_SHUTDOWN)
3426 		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3427 	if (shutdown == SHUTDOWN_MASK)
3428 		mask |= EPOLLHUP;
3429 
3430 	/* readable? */
3431 	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3432 		mask |= EPOLLIN | EPOLLRDNORM;
3433 	if (sk_is_readable(sk))
3434 		mask |= EPOLLIN | EPOLLRDNORM;
3435 
3436 	/* Connection-based need to check for termination and startup */
3437 	if (sk->sk_type == SOCK_SEQPACKET && state == TCP_CLOSE)
3438 		mask |= EPOLLHUP;
3439 
3440 	/* No write status requested, avoid expensive OUT tests. */
3441 	if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
3442 		return mask;
3443 
3444 	writable = unix_writable(sk, state);
3445 	if (writable) {
3446 		unix_state_lock(sk);
3447 
3448 		other = unix_peer(sk);
3449 		if (other && unix_peer(other) != sk &&
3450 		    unix_recvq_full_lockless(other) &&
3451 		    unix_dgram_peer_wake_me(sk, other))
3452 			writable = 0;
3453 
3454 		unix_state_unlock(sk);
3455 	}
3456 
3457 	if (writable)
3458 		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3459 	else
3460 		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
3461 
3462 	return mask;
3463 }
3464 
3465 #ifdef CONFIG_PROC_FS
3466 
3467 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
3468 
3469 #define get_bucket(x) ((x) >> BUCKET_SPACE)
3470 #define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1))
3471 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
3472 
3473 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
3474 {
3475 	unsigned long offset = get_offset(*pos);
3476 	unsigned long bucket = get_bucket(*pos);
3477 	unsigned long count = 0;
3478 	struct sock *sk;
3479 
3480 	for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]);
3481 	     sk; sk = sk_next(sk)) {
3482 		if (++count == offset)
3483 			break;
3484 	}
3485 
3486 	return sk;
3487 }
3488 
3489 static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos)
3490 {
3491 	unsigned long bucket = get_bucket(*pos);
3492 	struct net *net = seq_file_net(seq);
3493 	struct sock *sk;
3494 
3495 	while (bucket < UNIX_HASH_SIZE) {
3496 		spin_lock(&net->unx.table.locks[bucket]);
3497 
3498 		sk = unix_from_bucket(seq, pos);
3499 		if (sk)
3500 			return sk;
3501 
3502 		spin_unlock(&net->unx.table.locks[bucket]);
3503 
3504 		*pos = set_bucket_offset(++bucket, 1);
3505 	}
3506 
3507 	return NULL;
3508 }
3509 
3510 static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk,
3511 				  loff_t *pos)
3512 {
3513 	unsigned long bucket = get_bucket(*pos);
3514 
3515 	sk = sk_next(sk);
3516 	if (sk)
3517 		return sk;
3518 
3519 
3520 	spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]);
3521 
3522 	*pos = set_bucket_offset(++bucket, 1);
3523 
3524 	return unix_get_first(seq, pos);
3525 }
3526 
3527 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
3528 {
3529 	if (!*pos)
3530 		return SEQ_START_TOKEN;
3531 
3532 	return unix_get_first(seq, pos);
3533 }
3534 
3535 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3536 {
3537 	++*pos;
3538 
3539 	if (v == SEQ_START_TOKEN)
3540 		return unix_get_first(seq, pos);
3541 
3542 	return unix_get_next(seq, v, pos);
3543 }
3544 
3545 static void unix_seq_stop(struct seq_file *seq, void *v)
3546 {
3547 	struct sock *sk = v;
3548 
3549 	if (sk)
3550 		spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]);
3551 }
3552 
3553 static int unix_seq_show(struct seq_file *seq, void *v)
3554 {
3555 
3556 	if (v == SEQ_START_TOKEN)
3557 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
3558 			 "Inode Path\n");
3559 	else {
3560 		struct sock *s = v;
3561 		struct unix_sock *u = unix_sk(s);
3562 		unix_state_lock(s);
3563 
3564 		seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
3565 			s,
3566 			refcount_read(&s->sk_refcnt),
3567 			0,
3568 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
3569 			s->sk_type,
3570 			s->sk_socket ?
3571 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
3572 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
3573 			sock_i_ino(s));
3574 
3575 		if (u->addr) {	// under a hash table lock here
3576 			int i, len;
3577 			seq_putc(seq, ' ');
3578 
3579 			i = 0;
3580 			len = u->addr->len -
3581 				offsetof(struct sockaddr_un, sun_path);
3582 			if (u->addr->name->sun_path[0]) {
3583 				len--;
3584 			} else {
3585 				seq_putc(seq, '@');
3586 				i++;
3587 			}
3588 			for ( ; i < len; i++)
3589 				seq_putc(seq, u->addr->name->sun_path[i] ?:
3590 					 '@');
3591 		}
3592 		unix_state_unlock(s);
3593 		seq_putc(seq, '\n');
3594 	}
3595 
3596 	return 0;
3597 }
3598 
3599 static const struct seq_operations unix_seq_ops = {
3600 	.start  = unix_seq_start,
3601 	.next   = unix_seq_next,
3602 	.stop   = unix_seq_stop,
3603 	.show   = unix_seq_show,
3604 };
3605 
3606 #ifdef CONFIG_BPF_SYSCALL
3607 struct bpf_unix_iter_state {
3608 	struct seq_net_private p;
3609 	unsigned int cur_sk;
3610 	unsigned int end_sk;
3611 	unsigned int max_sk;
3612 	struct sock **batch;
3613 	bool st_bucket_done;
3614 };
3615 
3616 struct bpf_iter__unix {
3617 	__bpf_md_ptr(struct bpf_iter_meta *, meta);
3618 	__bpf_md_ptr(struct unix_sock *, unix_sk);
3619 	uid_t uid __aligned(8);
3620 };
3621 
3622 static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
3623 			      struct unix_sock *unix_sk, uid_t uid)
3624 {
3625 	struct bpf_iter__unix ctx;
3626 
3627 	meta->seq_num--;  /* skip SEQ_START_TOKEN */
3628 	ctx.meta = meta;
3629 	ctx.unix_sk = unix_sk;
3630 	ctx.uid = uid;
3631 	return bpf_iter_run_prog(prog, &ctx);
3632 }
3633 
3634 static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)
3635 
3636 {
3637 	struct bpf_unix_iter_state *iter = seq->private;
3638 	unsigned int expected = 1;
3639 	struct sock *sk;
3640 
3641 	sock_hold(start_sk);
3642 	iter->batch[iter->end_sk++] = start_sk;
3643 
3644 	for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) {
3645 		if (iter->end_sk < iter->max_sk) {
3646 			sock_hold(sk);
3647 			iter->batch[iter->end_sk++] = sk;
3648 		}
3649 
3650 		expected++;
3651 	}
3652 
3653 	spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]);
3654 
3655 	return expected;
3656 }
3657 
3658 static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter)
3659 {
3660 	while (iter->cur_sk < iter->end_sk)
3661 		sock_put(iter->batch[iter->cur_sk++]);
3662 }
3663 
3664 static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter,
3665 				       unsigned int new_batch_sz)
3666 {
3667 	struct sock **new_batch;
3668 
3669 	new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
3670 			     GFP_USER | __GFP_NOWARN);
3671 	if (!new_batch)
3672 		return -ENOMEM;
3673 
3674 	bpf_iter_unix_put_batch(iter);
3675 	kvfree(iter->batch);
3676 	iter->batch = new_batch;
3677 	iter->max_sk = new_batch_sz;
3678 
3679 	return 0;
3680 }
3681 
3682 static struct sock *bpf_iter_unix_batch(struct seq_file *seq,
3683 					loff_t *pos)
3684 {
3685 	struct bpf_unix_iter_state *iter = seq->private;
3686 	unsigned int expected;
3687 	bool resized = false;
3688 	struct sock *sk;
3689 
3690 	if (iter->st_bucket_done)
3691 		*pos = set_bucket_offset(get_bucket(*pos) + 1, 1);
3692 
3693 again:
3694 	/* Get a new batch */
3695 	iter->cur_sk = 0;
3696 	iter->end_sk = 0;
3697 
3698 	sk = unix_get_first(seq, pos);
3699 	if (!sk)
3700 		return NULL; /* Done */
3701 
3702 	expected = bpf_iter_unix_hold_batch(seq, sk);
3703 
3704 	if (iter->end_sk == expected) {
3705 		iter->st_bucket_done = true;
3706 		return sk;
3707 	}
3708 
3709 	if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) {
3710 		resized = true;
3711 		goto again;
3712 	}
3713 
3714 	return sk;
3715 }
3716 
3717 static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos)
3718 {
3719 	if (!*pos)
3720 		return SEQ_START_TOKEN;
3721 
3722 	/* bpf iter does not support lseek, so it always
3723 	 * continue from where it was stop()-ped.
3724 	 */
3725 	return bpf_iter_unix_batch(seq, pos);
3726 }
3727 
3728 static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3729 {
3730 	struct bpf_unix_iter_state *iter = seq->private;
3731 	struct sock *sk;
3732 
3733 	/* Whenever seq_next() is called, the iter->cur_sk is
3734 	 * done with seq_show(), so advance to the next sk in
3735 	 * the batch.
3736 	 */
3737 	if (iter->cur_sk < iter->end_sk)
3738 		sock_put(iter->batch[iter->cur_sk++]);
3739 
3740 	++*pos;
3741 
3742 	if (iter->cur_sk < iter->end_sk)
3743 		sk = iter->batch[iter->cur_sk];
3744 	else
3745 		sk = bpf_iter_unix_batch(seq, pos);
3746 
3747 	return sk;
3748 }
3749 
3750 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
3751 {
3752 	struct bpf_iter_meta meta;
3753 	struct bpf_prog *prog;
3754 	struct sock *sk = v;
3755 	uid_t uid;
3756 	bool slow;
3757 	int ret;
3758 
3759 	if (v == SEQ_START_TOKEN)
3760 		return 0;
3761 
3762 	slow = lock_sock_fast(sk);
3763 
3764 	if (unlikely(sk_unhashed(sk))) {
3765 		ret = SEQ_SKIP;
3766 		goto unlock;
3767 	}
3768 
3769 	uid = from_kuid_munged(seq_user_ns(seq), sk_uid(sk));
3770 	meta.seq = seq;
3771 	prog = bpf_iter_get_info(&meta, false);
3772 	ret = unix_prog_seq_show(prog, &meta, v, uid);
3773 unlock:
3774 	unlock_sock_fast(sk, slow);
3775 	return ret;
3776 }
3777 
3778 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
3779 {
3780 	struct bpf_unix_iter_state *iter = seq->private;
3781 	struct bpf_iter_meta meta;
3782 	struct bpf_prog *prog;
3783 
3784 	if (!v) {
3785 		meta.seq = seq;
3786 		prog = bpf_iter_get_info(&meta, true);
3787 		if (prog)
3788 			(void)unix_prog_seq_show(prog, &meta, v, 0);
3789 	}
3790 
3791 	if (iter->cur_sk < iter->end_sk)
3792 		bpf_iter_unix_put_batch(iter);
3793 }
3794 
3795 static const struct seq_operations bpf_iter_unix_seq_ops = {
3796 	.start	= bpf_iter_unix_seq_start,
3797 	.next	= bpf_iter_unix_seq_next,
3798 	.stop	= bpf_iter_unix_seq_stop,
3799 	.show	= bpf_iter_unix_seq_show,
3800 };
3801 #endif
3802 #endif
3803 
3804 static const struct net_proto_family unix_family_ops = {
3805 	.family = PF_UNIX,
3806 	.create = unix_create,
3807 	.owner	= THIS_MODULE,
3808 };
3809 
3810 
3811 static int __net_init unix_net_init(struct net *net)
3812 {
3813 	int i;
3814 
3815 	net->unx.sysctl_max_dgram_qlen = 10;
3816 	if (unix_sysctl_register(net))
3817 		goto out;
3818 
3819 #ifdef CONFIG_PROC_FS
3820 	if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
3821 			     sizeof(struct seq_net_private)))
3822 		goto err_sysctl;
3823 #endif
3824 
3825 	net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE,
3826 					      sizeof(spinlock_t), GFP_KERNEL);
3827 	if (!net->unx.table.locks)
3828 		goto err_proc;
3829 
3830 	net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE,
3831 						sizeof(struct hlist_head),
3832 						GFP_KERNEL);
3833 	if (!net->unx.table.buckets)
3834 		goto free_locks;
3835 
3836 	for (i = 0; i < UNIX_HASH_SIZE; i++) {
3837 		spin_lock_init(&net->unx.table.locks[i]);
3838 		lock_set_cmp_fn(&net->unx.table.locks[i], unix_table_lock_cmp_fn, NULL);
3839 		INIT_HLIST_HEAD(&net->unx.table.buckets[i]);
3840 	}
3841 
3842 	return 0;
3843 
3844 free_locks:
3845 	kvfree(net->unx.table.locks);
3846 err_proc:
3847 #ifdef CONFIG_PROC_FS
3848 	remove_proc_entry("unix", net->proc_net);
3849 err_sysctl:
3850 #endif
3851 	unix_sysctl_unregister(net);
3852 out:
3853 	return -ENOMEM;
3854 }
3855 
3856 static void __net_exit unix_net_exit(struct net *net)
3857 {
3858 	kvfree(net->unx.table.buckets);
3859 	kvfree(net->unx.table.locks);
3860 	unix_sysctl_unregister(net);
3861 	remove_proc_entry("unix", net->proc_net);
3862 }
3863 
3864 static struct pernet_operations unix_net_ops = {
3865 	.init = unix_net_init,
3866 	.exit = unix_net_exit,
3867 };
3868 
3869 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3870 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
3871 		     struct unix_sock *unix_sk, uid_t uid)
3872 
3873 #define INIT_BATCH_SZ 16
3874 
3875 static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux)
3876 {
3877 	struct bpf_unix_iter_state *iter = priv_data;
3878 	int err;
3879 
3880 	err = bpf_iter_init_seq_net(priv_data, aux);
3881 	if (err)
3882 		return err;
3883 
3884 	err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ);
3885 	if (err) {
3886 		bpf_iter_fini_seq_net(priv_data);
3887 		return err;
3888 	}
3889 
3890 	return 0;
3891 }
3892 
3893 static void bpf_iter_fini_unix(void *priv_data)
3894 {
3895 	struct bpf_unix_iter_state *iter = priv_data;
3896 
3897 	bpf_iter_fini_seq_net(priv_data);
3898 	kvfree(iter->batch);
3899 }
3900 
3901 static const struct bpf_iter_seq_info unix_seq_info = {
3902 	.seq_ops		= &bpf_iter_unix_seq_ops,
3903 	.init_seq_private	= bpf_iter_init_unix,
3904 	.fini_seq_private	= bpf_iter_fini_unix,
3905 	.seq_priv_size		= sizeof(struct bpf_unix_iter_state),
3906 };
3907 
3908 static const struct bpf_func_proto *
3909 bpf_iter_unix_get_func_proto(enum bpf_func_id func_id,
3910 			     const struct bpf_prog *prog)
3911 {
3912 	switch (func_id) {
3913 	case BPF_FUNC_setsockopt:
3914 		return &bpf_sk_setsockopt_proto;
3915 	case BPF_FUNC_getsockopt:
3916 		return &bpf_sk_getsockopt_proto;
3917 	default:
3918 		return NULL;
3919 	}
3920 }
3921 
3922 static struct bpf_iter_reg unix_reg_info = {
3923 	.target			= "unix",
3924 	.ctx_arg_info_size	= 1,
3925 	.ctx_arg_info		= {
3926 		{ offsetof(struct bpf_iter__unix, unix_sk),
3927 		  PTR_TO_BTF_ID_OR_NULL },
3928 	},
3929 	.get_func_proto         = bpf_iter_unix_get_func_proto,
3930 	.seq_info		= &unix_seq_info,
3931 };
3932 
3933 static void __init bpf_iter_register(void)
3934 {
3935 	unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
3936 	if (bpf_iter_reg_target(&unix_reg_info))
3937 		pr_warn("Warning: could not register bpf iterator unix\n");
3938 }
3939 #endif
3940 
3941 static int __init af_unix_init(void)
3942 {
3943 	int i, rc = -1;
3944 
3945 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
3946 
3947 	for (i = 0; i < UNIX_HASH_SIZE / 2; i++) {
3948 		spin_lock_init(&bsd_socket_locks[i]);
3949 		INIT_HLIST_HEAD(&bsd_socket_buckets[i]);
3950 	}
3951 
3952 	rc = proto_register(&unix_dgram_proto, 1);
3953 	if (rc != 0) {
3954 		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3955 		goto out;
3956 	}
3957 
3958 	rc = proto_register(&unix_stream_proto, 1);
3959 	if (rc != 0) {
3960 		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3961 		proto_unregister(&unix_dgram_proto);
3962 		goto out;
3963 	}
3964 
3965 	sock_register(&unix_family_ops);
3966 	register_pernet_subsys(&unix_net_ops);
3967 	unix_bpf_build_proto();
3968 
3969 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3970 	bpf_iter_register();
3971 #endif
3972 
3973 out:
3974 	return rc;
3975 }
3976 
3977 /* Later than subsys_initcall() because we depend on stuff initialised there */
3978 fs_initcall(af_unix_init);
3979