xref: /linux/net/unix/af_unix.c (revision 8d72997dab65b1e9e3220302e26eaecd9b99c02f)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * NET4:	Implementation of BSD Unix domain sockets.
4  *
5  * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
6  *
7  * Fixes:
8  *		Linus Torvalds	:	Assorted bug cures.
9  *		Niibe Yutaka	:	async I/O support.
10  *		Carsten Paeth	:	PF_UNIX check, address fixes.
11  *		Alan Cox	:	Limit size of allocated blocks.
12  *		Alan Cox	:	Fixed the stupid socketpair bug.
13  *		Alan Cox	:	BSD compatibility fine tuning.
14  *		Alan Cox	:	Fixed a bug in connect when interrupted.
15  *		Alan Cox	:	Sorted out a proper draft version of
16  *					file descriptor passing hacked up from
17  *					Mike Shaver's work.
18  *		Marty Leisner	:	Fixes to fd passing
19  *		Nick Nevin	:	recvmsg bugfix.
20  *		Alan Cox	:	Started proper garbage collector
21  *		Heiko EiBfeldt	:	Missing verify_area check
22  *		Alan Cox	:	Started POSIXisms
23  *		Andreas Schwab	:	Replace inode by dentry for proper
24  *					reference counting
25  *		Kirk Petersen	:	Made this a module
26  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
27  *					Lots of bug fixes.
28  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
29  *					by above two patches.
30  *	     Andrea Arcangeli	:	If possible we block in connect(2)
31  *					if the max backlog of the listen socket
32  *					is been reached. This won't break
33  *					old apps and it will avoid huge amount
34  *					of socks hashed (this for unix_gc()
35  *					performances reasons).
36  *					Security fix that limits the max
37  *					number of socks to 2*max_files and
38  *					the number of skb queueable in the
39  *					dgram receiver.
40  *		Artur Skawina   :	Hash function optimizations
41  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
42  *	      Malcolm Beattie   :	Set peercred for socketpair
43  *	     Michal Ostrowski   :       Module initialization cleanup.
44  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
45  *	     				the core infrastructure is doing that
46  *	     				for all net proto families now (2.5.69+)
47  *
48  * Known differences from reference BSD that was tested:
49  *
50  *	[TO FIX]
51  *	ECONNREFUSED is not returned from one end of a connected() socket to the
52  *		other the moment one end closes.
53  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
54  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
55  *	[NOT TO FIX]
56  *	accept() returns a path name even if the connecting socket has closed
57  *		in the meantime (BSD loses the path and gives up).
58  *	accept() returns 0 length path for an unbound connector. BSD returns 16
59  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
61  *	BSD af_unix apparently has connect forgetting to block properly.
62  *		(need to check this with the POSIX spec in detail)
63  *
64  * Differences from 2.0.0-11-... (ANK)
65  *	Bug fixes and improvements.
66  *		- client shutdown killed server socket.
67  *		- removed all useless cli/sti pairs.
68  *
69  *	Semantic changes/extensions.
70  *		- generic control message passing.
71  *		- SCM_CREDENTIALS control message.
72  *		- "Abstract" (not FS based) socket bindings.
73  *		  Abstract names are sequences of bytes (not zero terminated)
74  *		  started by 0, so that this name space does not intersect
75  *		  with BSD names.
76  */
77 
78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
79 
80 #include <linux/bpf-cgroup.h>
81 #include <linux/btf_ids.h>
82 #include <linux/dcache.h>
83 #include <linux/errno.h>
84 #include <linux/fcntl.h>
85 #include <linux/file.h>
86 #include <linux/filter.h>
87 #include <linux/fs.h>
88 #include <linux/fs_struct.h>
89 #include <linux/init.h>
90 #include <linux/kernel.h>
91 #include <linux/mount.h>
92 #include <linux/namei.h>
93 #include <linux/net.h>
94 #include <linux/pidfs.h>
95 #include <linux/poll.h>
96 #include <linux/proc_fs.h>
97 #include <linux/sched/signal.h>
98 #include <linux/security.h>
99 #include <linux/seq_file.h>
100 #include <linux/skbuff.h>
101 #include <linux/slab.h>
102 #include <linux/socket.h>
103 #include <linux/splice.h>
104 #include <linux/string.h>
105 #include <linux/uaccess.h>
106 #include <net/af_unix.h>
107 #include <net/net_namespace.h>
108 #include <net/scm.h>
109 #include <net/tcp_states.h>
110 #include <uapi/linux/sockios.h>
111 #include <uapi/linux/termios.h>
112 
113 #include "af_unix.h"
114 
115 static atomic_long_t unix_nr_socks;
116 static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
117 static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
118 
119 /* SMP locking strategy:
120  *    hash table is protected with spinlock.
121  *    each socket state is protected by separate spinlock.
122  */
123 #ifdef CONFIG_PROVE_LOCKING
124 #define cmp_ptr(l, r)	(((l) > (r)) - ((l) < (r)))
125 
126 static int unix_table_lock_cmp_fn(const struct lockdep_map *a,
127 				  const struct lockdep_map *b)
128 {
129 	return cmp_ptr(a, b);
130 }
131 
132 static int unix_state_lock_cmp_fn(const struct lockdep_map *_a,
133 				  const struct lockdep_map *_b)
134 {
135 	const struct unix_sock *a, *b;
136 
137 	a = container_of(_a, struct unix_sock, lock.dep_map);
138 	b = container_of(_b, struct unix_sock, lock.dep_map);
139 
140 	if (a->sk.sk_state == TCP_LISTEN) {
141 		/* unix_stream_connect(): Before the 2nd unix_state_lock(),
142 		 *
143 		 *   1. a is TCP_LISTEN.
144 		 *   2. b is not a.
145 		 *   3. concurrent connect(b -> a) must fail.
146 		 *
147 		 * Except for 2. & 3., the b's state can be any possible
148 		 * value due to concurrent connect() or listen().
149 		 *
150 		 * 2. is detected in debug_spin_lock_before(), and 3. cannot
151 		 * be expressed as lock_cmp_fn.
152 		 */
153 		switch (b->sk.sk_state) {
154 		case TCP_CLOSE:
155 		case TCP_ESTABLISHED:
156 		case TCP_LISTEN:
157 			return -1;
158 		default:
159 			/* Invalid case. */
160 			return 0;
161 		}
162 	}
163 
164 	/* Should never happen.  Just to be symmetric. */
165 	if (b->sk.sk_state == TCP_LISTEN) {
166 		switch (b->sk.sk_state) {
167 		case TCP_CLOSE:
168 		case TCP_ESTABLISHED:
169 			return 1;
170 		default:
171 			return 0;
172 		}
173 	}
174 
175 	/* unix_state_double_lock(): ascending address order. */
176 	return cmp_ptr(a, b);
177 }
178 
179 static int unix_recvq_lock_cmp_fn(const struct lockdep_map *_a,
180 				  const struct lockdep_map *_b)
181 {
182 	const struct sock *a, *b;
183 
184 	a = container_of(_a, struct sock, sk_receive_queue.lock.dep_map);
185 	b = container_of(_b, struct sock, sk_receive_queue.lock.dep_map);
186 
187 	/* unix_collect_skb(): listener -> embryo order. */
188 	if (a->sk_state == TCP_LISTEN && unix_sk(b)->listener == a)
189 		return -1;
190 
191 	/* Should never happen.  Just to be symmetric. */
192 	if (b->sk_state == TCP_LISTEN && unix_sk(a)->listener == b)
193 		return 1;
194 
195 	return 0;
196 }
197 #endif
198 
199 static unsigned int unix_unbound_hash(struct sock *sk)
200 {
201 	unsigned long hash = (unsigned long)sk;
202 
203 	hash ^= hash >> 16;
204 	hash ^= hash >> 8;
205 	hash ^= sk->sk_type;
206 
207 	return hash & UNIX_HASH_MOD;
208 }
209 
210 static unsigned int unix_bsd_hash(struct inode *i)
211 {
212 	return i->i_ino & UNIX_HASH_MOD;
213 }
214 
215 static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr,
216 				       int addr_len, int type)
217 {
218 	__wsum csum = csum_partial(sunaddr, addr_len, 0);
219 	unsigned int hash;
220 
221 	hash = (__force unsigned int)csum_fold(csum);
222 	hash ^= hash >> 8;
223 	hash ^= type;
224 
225 	return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD);
226 }
227 
228 static void unix_table_double_lock(struct net *net,
229 				   unsigned int hash1, unsigned int hash2)
230 {
231 	if (hash1 == hash2) {
232 		spin_lock(&net->unx.table.locks[hash1]);
233 		return;
234 	}
235 
236 	if (hash1 > hash2)
237 		swap(hash1, hash2);
238 
239 	spin_lock(&net->unx.table.locks[hash1]);
240 	spin_lock(&net->unx.table.locks[hash2]);
241 }
242 
243 static void unix_table_double_unlock(struct net *net,
244 				     unsigned int hash1, unsigned int hash2)
245 {
246 	if (hash1 == hash2) {
247 		spin_unlock(&net->unx.table.locks[hash1]);
248 		return;
249 	}
250 
251 	spin_unlock(&net->unx.table.locks[hash1]);
252 	spin_unlock(&net->unx.table.locks[hash2]);
253 }
254 
255 #ifdef CONFIG_SECURITY_NETWORK
256 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
257 {
258 	UNIXCB(skb).secid = scm->secid;
259 }
260 
261 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
262 {
263 	scm->secid = UNIXCB(skb).secid;
264 }
265 
266 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
267 {
268 	return (scm->secid == UNIXCB(skb).secid);
269 }
270 #else
271 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
272 { }
273 
274 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
275 { }
276 
277 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
278 {
279 	return true;
280 }
281 #endif /* CONFIG_SECURITY_NETWORK */
282 
283 static inline int unix_may_send(struct sock *sk, struct sock *osk)
284 {
285 	return !unix_peer(osk) || unix_peer(osk) == sk;
286 }
287 
288 static inline int unix_recvq_full_lockless(const struct sock *sk)
289 {
290 	return skb_queue_len_lockless(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
291 }
292 
293 struct sock *unix_peer_get(struct sock *s)
294 {
295 	struct sock *peer;
296 
297 	unix_state_lock(s);
298 	peer = unix_peer(s);
299 	if (peer)
300 		sock_hold(peer);
301 	unix_state_unlock(s);
302 	return peer;
303 }
304 EXPORT_SYMBOL_GPL(unix_peer_get);
305 
306 static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr,
307 					     int addr_len)
308 {
309 	struct unix_address *addr;
310 
311 	addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL);
312 	if (!addr)
313 		return NULL;
314 
315 	refcount_set(&addr->refcnt, 1);
316 	addr->len = addr_len;
317 	memcpy(addr->name, sunaddr, addr_len);
318 
319 	return addr;
320 }
321 
322 static inline void unix_release_addr(struct unix_address *addr)
323 {
324 	if (refcount_dec_and_test(&addr->refcnt))
325 		kfree(addr);
326 }
327 
328 /*
329  *	Check unix socket name:
330  *		- should be not zero length.
331  *	        - if started by not zero, should be NULL terminated (FS object)
332  *		- if started by zero, it is abstract name.
333  */
334 
335 static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len)
336 {
337 	if (addr_len <= offsetof(struct sockaddr_un, sun_path) ||
338 	    addr_len > sizeof(*sunaddr))
339 		return -EINVAL;
340 
341 	if (sunaddr->sun_family != AF_UNIX)
342 		return -EINVAL;
343 
344 	return 0;
345 }
346 
347 static int unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len)
348 {
349 	struct sockaddr_storage *addr = (struct sockaddr_storage *)sunaddr;
350 	short offset = offsetof(struct sockaddr_storage, __data);
351 
352 	BUILD_BUG_ON(offset != offsetof(struct sockaddr_un, sun_path));
353 
354 	/* This may look like an off by one error but it is a bit more
355 	 * subtle.  108 is the longest valid AF_UNIX path for a binding.
356 	 * sun_path[108] doesn't as such exist.  However in kernel space
357 	 * we are guaranteed that it is a valid memory location in our
358 	 * kernel address buffer because syscall functions always pass
359 	 * a pointer of struct sockaddr_storage which has a bigger buffer
360 	 * than 108.  Also, we must terminate sun_path for strlen() in
361 	 * getname_kernel().
362 	 */
363 	addr->__data[addr_len - offset] = 0;
364 
365 	/* Don't pass sunaddr->sun_path to strlen().  Otherwise, 108 will
366 	 * cause panic if CONFIG_FORTIFY_SOURCE=y.  Let __fortify_strlen()
367 	 * know the actual buffer.
368 	 */
369 	return strlen(addr->__data) + offset + 1;
370 }
371 
372 static void __unix_remove_socket(struct sock *sk)
373 {
374 	sk_del_node_init(sk);
375 }
376 
377 static void __unix_insert_socket(struct net *net, struct sock *sk)
378 {
379 	DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
380 	sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]);
381 }
382 
383 static void __unix_set_addr_hash(struct net *net, struct sock *sk,
384 				 struct unix_address *addr, unsigned int hash)
385 {
386 	__unix_remove_socket(sk);
387 	smp_store_release(&unix_sk(sk)->addr, addr);
388 
389 	sk->sk_hash = hash;
390 	__unix_insert_socket(net, sk);
391 }
392 
393 static void unix_remove_socket(struct net *net, struct sock *sk)
394 {
395 	spin_lock(&net->unx.table.locks[sk->sk_hash]);
396 	__unix_remove_socket(sk);
397 	spin_unlock(&net->unx.table.locks[sk->sk_hash]);
398 }
399 
400 static void unix_insert_unbound_socket(struct net *net, struct sock *sk)
401 {
402 	spin_lock(&net->unx.table.locks[sk->sk_hash]);
403 	__unix_insert_socket(net, sk);
404 	spin_unlock(&net->unx.table.locks[sk->sk_hash]);
405 }
406 
407 static void unix_insert_bsd_socket(struct sock *sk)
408 {
409 	spin_lock(&bsd_socket_locks[sk->sk_hash]);
410 	sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]);
411 	spin_unlock(&bsd_socket_locks[sk->sk_hash]);
412 }
413 
414 static void unix_remove_bsd_socket(struct sock *sk)
415 {
416 	if (!hlist_unhashed(&sk->sk_bind_node)) {
417 		spin_lock(&bsd_socket_locks[sk->sk_hash]);
418 		__sk_del_bind_node(sk);
419 		spin_unlock(&bsd_socket_locks[sk->sk_hash]);
420 
421 		sk_node_init(&sk->sk_bind_node);
422 	}
423 }
424 
425 static struct sock *__unix_find_socket_byname(struct net *net,
426 					      struct sockaddr_un *sunname,
427 					      int len, unsigned int hash)
428 {
429 	struct sock *s;
430 
431 	sk_for_each(s, &net->unx.table.buckets[hash]) {
432 		struct unix_sock *u = unix_sk(s);
433 
434 		if (u->addr->len == len &&
435 		    !memcmp(u->addr->name, sunname, len))
436 			return s;
437 	}
438 	return NULL;
439 }
440 
441 static inline struct sock *unix_find_socket_byname(struct net *net,
442 						   struct sockaddr_un *sunname,
443 						   int len, unsigned int hash)
444 {
445 	struct sock *s;
446 
447 	spin_lock(&net->unx.table.locks[hash]);
448 	s = __unix_find_socket_byname(net, sunname, len, hash);
449 	if (s)
450 		sock_hold(s);
451 	spin_unlock(&net->unx.table.locks[hash]);
452 	return s;
453 }
454 
455 static struct sock *unix_find_socket_byinode(struct inode *i)
456 {
457 	unsigned int hash = unix_bsd_hash(i);
458 	struct sock *s;
459 
460 	spin_lock(&bsd_socket_locks[hash]);
461 	sk_for_each_bound(s, &bsd_socket_buckets[hash]) {
462 		struct dentry *dentry = unix_sk(s)->path.dentry;
463 
464 		if (dentry && d_backing_inode(dentry) == i) {
465 			sock_hold(s);
466 			spin_unlock(&bsd_socket_locks[hash]);
467 			return s;
468 		}
469 	}
470 	spin_unlock(&bsd_socket_locks[hash]);
471 	return NULL;
472 }
473 
474 /* Support code for asymmetrically connected dgram sockets
475  *
476  * If a datagram socket is connected to a socket not itself connected
477  * to the first socket (eg, /dev/log), clients may only enqueue more
478  * messages if the present receive queue of the server socket is not
479  * "too large". This means there's a second writeability condition
480  * poll and sendmsg need to test. The dgram recv code will do a wake
481  * up on the peer_wait wait queue of a socket upon reception of a
482  * datagram which needs to be propagated to sleeping would-be writers
483  * since these might not have sent anything so far. This can't be
484  * accomplished via poll_wait because the lifetime of the server
485  * socket might be less than that of its clients if these break their
486  * association with it or if the server socket is closed while clients
487  * are still connected to it and there's no way to inform "a polling
488  * implementation" that it should let go of a certain wait queue
489  *
490  * In order to propagate a wake up, a wait_queue_entry_t of the client
491  * socket is enqueued on the peer_wait queue of the server socket
492  * whose wake function does a wake_up on the ordinary client socket
493  * wait queue. This connection is established whenever a write (or
494  * poll for write) hit the flow control condition and broken when the
495  * association to the server socket is dissolved or after a wake up
496  * was relayed.
497  */
498 
499 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
500 				      void *key)
501 {
502 	struct unix_sock *u;
503 	wait_queue_head_t *u_sleep;
504 
505 	u = container_of(q, struct unix_sock, peer_wake);
506 
507 	__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
508 			    q);
509 	u->peer_wake.private = NULL;
510 
511 	/* relaying can only happen while the wq still exists */
512 	u_sleep = sk_sleep(&u->sk);
513 	if (u_sleep)
514 		wake_up_interruptible_poll(u_sleep, key_to_poll(key));
515 
516 	return 0;
517 }
518 
519 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
520 {
521 	struct unix_sock *u, *u_other;
522 	int rc;
523 
524 	u = unix_sk(sk);
525 	u_other = unix_sk(other);
526 	rc = 0;
527 	spin_lock(&u_other->peer_wait.lock);
528 
529 	if (!u->peer_wake.private) {
530 		u->peer_wake.private = other;
531 		__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
532 
533 		rc = 1;
534 	}
535 
536 	spin_unlock(&u_other->peer_wait.lock);
537 	return rc;
538 }
539 
540 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
541 					    struct sock *other)
542 {
543 	struct unix_sock *u, *u_other;
544 
545 	u = unix_sk(sk);
546 	u_other = unix_sk(other);
547 	spin_lock(&u_other->peer_wait.lock);
548 
549 	if (u->peer_wake.private == other) {
550 		__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
551 		u->peer_wake.private = NULL;
552 	}
553 
554 	spin_unlock(&u_other->peer_wait.lock);
555 }
556 
557 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
558 						   struct sock *other)
559 {
560 	unix_dgram_peer_wake_disconnect(sk, other);
561 	wake_up_interruptible_poll(sk_sleep(sk),
562 				   EPOLLOUT |
563 				   EPOLLWRNORM |
564 				   EPOLLWRBAND);
565 }
566 
567 /* preconditions:
568  *	- unix_peer(sk) == other
569  *	- association is stable
570  */
571 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
572 {
573 	int connected;
574 
575 	connected = unix_dgram_peer_wake_connect(sk, other);
576 
577 	/* If other is SOCK_DEAD, we want to make sure we signal
578 	 * POLLOUT, such that a subsequent write() can get a
579 	 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
580 	 * to other and its full, we will hang waiting for POLLOUT.
581 	 */
582 	if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
583 		return 1;
584 
585 	if (connected)
586 		unix_dgram_peer_wake_disconnect(sk, other);
587 
588 	return 0;
589 }
590 
591 static int unix_writable(const struct sock *sk, unsigned char state)
592 {
593 	return state != TCP_LISTEN &&
594 		(refcount_read(&sk->sk_wmem_alloc) << 2) <= READ_ONCE(sk->sk_sndbuf);
595 }
596 
597 static void unix_write_space(struct sock *sk)
598 {
599 	struct socket_wq *wq;
600 
601 	rcu_read_lock();
602 	if (unix_writable(sk, READ_ONCE(sk->sk_state))) {
603 		wq = rcu_dereference(sk->sk_wq);
604 		if (skwq_has_sleeper(wq))
605 			wake_up_interruptible_sync_poll(&wq->wait,
606 				EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
607 		sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
608 	}
609 	rcu_read_unlock();
610 }
611 
612 /* When dgram socket disconnects (or changes its peer), we clear its receive
613  * queue of packets arrived from previous peer. First, it allows to do
614  * flow control based only on wmem_alloc; second, sk connected to peer
615  * may receive messages only from that peer. */
616 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
617 {
618 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
619 		skb_queue_purge_reason(&sk->sk_receive_queue,
620 				       SKB_DROP_REASON_UNIX_DISCONNECT);
621 
622 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
623 
624 		/* If one link of bidirectional dgram pipe is disconnected,
625 		 * we signal error. Messages are lost. Do not make this,
626 		 * when peer was not connected to us.
627 		 */
628 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
629 			WRITE_ONCE(other->sk_err, ECONNRESET);
630 			sk_error_report(other);
631 		}
632 	}
633 }
634 
635 static void unix_sock_destructor(struct sock *sk)
636 {
637 	struct unix_sock *u = unix_sk(sk);
638 
639 	skb_queue_purge_reason(&sk->sk_receive_queue, SKB_DROP_REASON_SOCKET_CLOSE);
640 
641 	DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc));
642 	DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
643 	DEBUG_NET_WARN_ON_ONCE(sk->sk_socket);
644 	if (!sock_flag(sk, SOCK_DEAD)) {
645 		pr_info("Attempt to release alive unix socket: %p\n", sk);
646 		return;
647 	}
648 
649 	if (u->addr)
650 		unix_release_addr(u->addr);
651 
652 	atomic_long_dec(&unix_nr_socks);
653 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
654 #ifdef UNIX_REFCNT_DEBUG
655 	pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
656 		atomic_long_read(&unix_nr_socks));
657 #endif
658 }
659 
660 static unsigned int unix_skb_len(const struct sk_buff *skb)
661 {
662 	return skb->len - UNIXCB(skb).consumed;
663 }
664 
665 static void unix_release_sock(struct sock *sk, int embrion)
666 {
667 	struct unix_sock *u = unix_sk(sk);
668 	struct sock *skpair;
669 	struct sk_buff *skb;
670 	struct path path;
671 	int state;
672 
673 	unix_remove_socket(sock_net(sk), sk);
674 	unix_remove_bsd_socket(sk);
675 
676 	/* Clear state */
677 	unix_state_lock(sk);
678 	sock_orphan(sk);
679 	WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
680 	path	     = u->path;
681 	u->path.dentry = NULL;
682 	u->path.mnt = NULL;
683 	state = sk->sk_state;
684 	WRITE_ONCE(sk->sk_state, TCP_CLOSE);
685 
686 	skpair = unix_peer(sk);
687 	unix_peer(sk) = NULL;
688 
689 	unix_state_unlock(sk);
690 
691 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
692 	u->oob_skb = NULL;
693 #endif
694 
695 	wake_up_interruptible_all(&u->peer_wait);
696 
697 	if (skpair != NULL) {
698 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
699 			struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
700 
701 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
702 			if (skb && !unix_skb_len(skb))
703 				skb = skb_peek_next(skb, &sk->sk_receive_queue);
704 #endif
705 			unix_state_lock(skpair);
706 			/* No more writes */
707 			WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
708 			if (skb || embrion)
709 				WRITE_ONCE(skpair->sk_err, ECONNRESET);
710 			unix_state_unlock(skpair);
711 			skpair->sk_state_change(skpair);
712 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
713 		}
714 
715 		unix_dgram_peer_wake_disconnect(sk, skpair);
716 		sock_put(skpair); /* It may now die */
717 	}
718 
719 	/* Try to flush out this socket. Throw out buffers at least */
720 
721 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
722 		if (state == TCP_LISTEN)
723 			unix_release_sock(skb->sk, 1);
724 
725 		/* passed fds are erased in the kfree_skb hook */
726 		kfree_skb_reason(skb, SKB_DROP_REASON_SOCKET_CLOSE);
727 	}
728 
729 	if (path.dentry)
730 		path_put(&path);
731 
732 	sock_put(sk);
733 
734 	/* ---- Socket is dead now and most probably destroyed ---- */
735 
736 	unix_schedule_gc(NULL);
737 }
738 
739 struct unix_peercred {
740 	struct pid *peer_pid;
741 	const struct cred *peer_cred;
742 };
743 
744 static inline int prepare_peercred(struct unix_peercred *peercred)
745 {
746 	struct pid *pid;
747 	int err;
748 
749 	pid = task_tgid(current);
750 	err = pidfs_register_pid(pid);
751 	if (likely(!err)) {
752 		peercred->peer_pid = get_pid(pid);
753 		peercred->peer_cred = get_current_cred();
754 	}
755 	return err;
756 }
757 
758 static void drop_peercred(struct unix_peercred *peercred)
759 {
760 	const struct cred *cred = NULL;
761 	struct pid *pid = NULL;
762 
763 	might_sleep();
764 
765 	swap(peercred->peer_pid, pid);
766 	swap(peercred->peer_cred, cred);
767 
768 	put_pid(pid);
769 	put_cred(cred);
770 }
771 
772 static inline void init_peercred(struct sock *sk,
773 				 const struct unix_peercred *peercred)
774 {
775 	sk->sk_peer_pid = peercred->peer_pid;
776 	sk->sk_peer_cred = peercred->peer_cred;
777 }
778 
779 static void update_peercred(struct sock *sk, struct unix_peercred *peercred)
780 {
781 	const struct cred *old_cred;
782 	struct pid *old_pid;
783 
784 	spin_lock(&sk->sk_peer_lock);
785 	old_pid = sk->sk_peer_pid;
786 	old_cred = sk->sk_peer_cred;
787 	init_peercred(sk, peercred);
788 	spin_unlock(&sk->sk_peer_lock);
789 
790 	peercred->peer_pid = old_pid;
791 	peercred->peer_cred = old_cred;
792 }
793 
794 static void copy_peercred(struct sock *sk, struct sock *peersk)
795 {
796 	lockdep_assert_held(&unix_sk(peersk)->lock);
797 
798 	spin_lock(&sk->sk_peer_lock);
799 	sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
800 	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
801 	spin_unlock(&sk->sk_peer_lock);
802 }
803 
804 static bool unix_may_passcred(const struct sock *sk)
805 {
806 	return sk->sk_scm_credentials || sk->sk_scm_pidfd;
807 }
808 
809 static int unix_listen(struct socket *sock, int backlog)
810 {
811 	int err;
812 	struct sock *sk = sock->sk;
813 	struct unix_sock *u = unix_sk(sk);
814 	struct unix_peercred peercred = {};
815 
816 	err = -EOPNOTSUPP;
817 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
818 		goto out;	/* Only stream/seqpacket sockets accept */
819 	err = -EINVAL;
820 	if (!READ_ONCE(u->addr))
821 		goto out;	/* No listens on an unbound socket */
822 	err = prepare_peercred(&peercred);
823 	if (err)
824 		goto out;
825 	unix_state_lock(sk);
826 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
827 		goto out_unlock;
828 	if (backlog > sk->sk_max_ack_backlog)
829 		wake_up_interruptible_all(&u->peer_wait);
830 	sk->sk_max_ack_backlog	= backlog;
831 	WRITE_ONCE(sk->sk_state, TCP_LISTEN);
832 
833 	/* set credentials so connect can copy them */
834 	update_peercred(sk, &peercred);
835 	err = 0;
836 
837 out_unlock:
838 	unix_state_unlock(sk);
839 	drop_peercred(&peercred);
840 out:
841 	return err;
842 }
843 
844 static int unix_release(struct socket *);
845 static int unix_bind(struct socket *, struct sockaddr_unsized *, int);
846 static int unix_stream_connect(struct socket *, struct sockaddr_unsized *,
847 			       int addr_len, int flags);
848 static int unix_socketpair(struct socket *, struct socket *);
849 static int unix_accept(struct socket *, struct socket *, struct proto_accept_arg *arg);
850 static int unix_getname(struct socket *, struct sockaddr *, int);
851 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
852 static __poll_t unix_dgram_poll(struct file *, struct socket *,
853 				    poll_table *);
854 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
855 #ifdef CONFIG_COMPAT
856 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
857 #endif
858 static int unix_shutdown(struct socket *, int);
859 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
860 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
861 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
862 				       struct pipe_inode_info *, size_t size,
863 				       unsigned int flags);
864 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
865 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
866 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
867 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
868 static int unix_dgram_connect(struct socket *, struct sockaddr_unsized *,
869 			      int, int);
870 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
871 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
872 				  int);
873 
874 #ifdef CONFIG_PROC_FS
875 static int unix_count_nr_fds(struct sock *sk)
876 {
877 	struct sk_buff *skb;
878 	struct unix_sock *u;
879 	int nr_fds = 0;
880 
881 	spin_lock(&sk->sk_receive_queue.lock);
882 	skb = skb_peek(&sk->sk_receive_queue);
883 	while (skb) {
884 		u = unix_sk(skb->sk);
885 		nr_fds += atomic_read(&u->scm_stat.nr_fds);
886 		skb = skb_peek_next(skb, &sk->sk_receive_queue);
887 	}
888 	spin_unlock(&sk->sk_receive_queue.lock);
889 
890 	return nr_fds;
891 }
892 
893 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
894 {
895 	struct sock *sk = sock->sk;
896 	unsigned char s_state;
897 	struct unix_sock *u;
898 	int nr_fds = 0;
899 
900 	if (sk) {
901 		s_state = READ_ONCE(sk->sk_state);
902 		u = unix_sk(sk);
903 
904 		/* SOCK_STREAM and SOCK_SEQPACKET sockets never change their
905 		 * sk_state after switching to TCP_ESTABLISHED or TCP_LISTEN.
906 		 * SOCK_DGRAM is ordinary. So, no lock is needed.
907 		 */
908 		if (sock->type == SOCK_DGRAM || s_state == TCP_ESTABLISHED)
909 			nr_fds = atomic_read(&u->scm_stat.nr_fds);
910 		else if (s_state == TCP_LISTEN)
911 			nr_fds = unix_count_nr_fds(sk);
912 
913 		seq_printf(m, "scm_fds: %u\n", nr_fds);
914 	}
915 }
916 #else
917 #define unix_show_fdinfo NULL
918 #endif
919 
920 static bool unix_custom_sockopt(int optname)
921 {
922 	switch (optname) {
923 	case SO_INQ:
924 		return true;
925 	default:
926 		return false;
927 	}
928 }
929 
930 static int unix_setsockopt(struct socket *sock, int level, int optname,
931 			   sockptr_t optval, unsigned int optlen)
932 {
933 	struct unix_sock *u = unix_sk(sock->sk);
934 	struct sock *sk = sock->sk;
935 	int val;
936 
937 	if (level != SOL_SOCKET)
938 		return -EOPNOTSUPP;
939 
940 	if (!unix_custom_sockopt(optname))
941 		return sock_setsockopt(sock, level, optname, optval, optlen);
942 
943 	if (optlen != sizeof(int))
944 		return -EINVAL;
945 
946 	if (copy_from_sockptr(&val, optval, sizeof(val)))
947 		return -EFAULT;
948 
949 	switch (optname) {
950 	case SO_INQ:
951 		if (sk->sk_type != SOCK_STREAM)
952 			return -EINVAL;
953 
954 		if (val > 1 || val < 0)
955 			return -EINVAL;
956 
957 		WRITE_ONCE(u->recvmsg_inq, val);
958 		break;
959 	default:
960 		return -ENOPROTOOPT;
961 	}
962 
963 	return 0;
964 }
965 
966 static const struct proto_ops unix_stream_ops = {
967 	.family =	PF_UNIX,
968 	.owner =	THIS_MODULE,
969 	.release =	unix_release,
970 	.bind =		unix_bind,
971 	.connect =	unix_stream_connect,
972 	.socketpair =	unix_socketpair,
973 	.accept =	unix_accept,
974 	.getname =	unix_getname,
975 	.poll =		unix_poll,
976 	.ioctl =	unix_ioctl,
977 #ifdef CONFIG_COMPAT
978 	.compat_ioctl =	unix_compat_ioctl,
979 #endif
980 	.listen =	unix_listen,
981 	.shutdown =	unix_shutdown,
982 	.setsockopt =	unix_setsockopt,
983 	.sendmsg =	unix_stream_sendmsg,
984 	.recvmsg =	unix_stream_recvmsg,
985 	.read_skb =	unix_stream_read_skb,
986 	.mmap =		sock_no_mmap,
987 	.splice_read =	unix_stream_splice_read,
988 	.set_peek_off =	sk_set_peek_off,
989 	.show_fdinfo =	unix_show_fdinfo,
990 };
991 
992 static const struct proto_ops unix_dgram_ops = {
993 	.family =	PF_UNIX,
994 	.owner =	THIS_MODULE,
995 	.release =	unix_release,
996 	.bind =		unix_bind,
997 	.connect =	unix_dgram_connect,
998 	.socketpair =	unix_socketpair,
999 	.accept =	sock_no_accept,
1000 	.getname =	unix_getname,
1001 	.poll =		unix_dgram_poll,
1002 	.ioctl =	unix_ioctl,
1003 #ifdef CONFIG_COMPAT
1004 	.compat_ioctl =	unix_compat_ioctl,
1005 #endif
1006 	.listen =	sock_no_listen,
1007 	.shutdown =	unix_shutdown,
1008 	.sendmsg =	unix_dgram_sendmsg,
1009 	.read_skb =	unix_read_skb,
1010 	.recvmsg =	unix_dgram_recvmsg,
1011 	.mmap =		sock_no_mmap,
1012 	.set_peek_off =	sk_set_peek_off,
1013 	.show_fdinfo =	unix_show_fdinfo,
1014 };
1015 
1016 static const struct proto_ops unix_seqpacket_ops = {
1017 	.family =	PF_UNIX,
1018 	.owner =	THIS_MODULE,
1019 	.release =	unix_release,
1020 	.bind =		unix_bind,
1021 	.connect =	unix_stream_connect,
1022 	.socketpair =	unix_socketpair,
1023 	.accept =	unix_accept,
1024 	.getname =	unix_getname,
1025 	.poll =		unix_dgram_poll,
1026 	.ioctl =	unix_ioctl,
1027 #ifdef CONFIG_COMPAT
1028 	.compat_ioctl =	unix_compat_ioctl,
1029 #endif
1030 	.listen =	unix_listen,
1031 	.shutdown =	unix_shutdown,
1032 	.sendmsg =	unix_seqpacket_sendmsg,
1033 	.recvmsg =	unix_seqpacket_recvmsg,
1034 	.mmap =		sock_no_mmap,
1035 	.set_peek_off =	sk_set_peek_off,
1036 	.show_fdinfo =	unix_show_fdinfo,
1037 };
1038 
1039 static void unix_close(struct sock *sk, long timeout)
1040 {
1041 	/* Nothing to do here, unix socket does not need a ->close().
1042 	 * This is merely for sockmap.
1043 	 */
1044 }
1045 
1046 static bool unix_bpf_bypass_getsockopt(int level, int optname)
1047 {
1048 	if (level == SOL_SOCKET) {
1049 		switch (optname) {
1050 		case SO_PEERPIDFD:
1051 			return true;
1052 		default:
1053 			return false;
1054 		}
1055 	}
1056 
1057 	return false;
1058 }
1059 
1060 struct proto unix_dgram_proto = {
1061 	.name			= "UNIX",
1062 	.owner			= THIS_MODULE,
1063 	.obj_size		= sizeof(struct unix_sock),
1064 	.close			= unix_close,
1065 	.bpf_bypass_getsockopt	= unix_bpf_bypass_getsockopt,
1066 #ifdef CONFIG_BPF_SYSCALL
1067 	.psock_update_sk_prot	= unix_dgram_bpf_update_proto,
1068 #endif
1069 };
1070 
1071 struct proto unix_stream_proto = {
1072 	.name			= "UNIX-STREAM",
1073 	.owner			= THIS_MODULE,
1074 	.obj_size		= sizeof(struct unix_sock),
1075 	.close			= unix_close,
1076 	.bpf_bypass_getsockopt	= unix_bpf_bypass_getsockopt,
1077 #ifdef CONFIG_BPF_SYSCALL
1078 	.psock_update_sk_prot	= unix_stream_bpf_update_proto,
1079 #endif
1080 };
1081 
1082 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
1083 {
1084 	struct unix_sock *u;
1085 	struct sock *sk;
1086 	int err;
1087 
1088 	atomic_long_inc(&unix_nr_socks);
1089 	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) {
1090 		err = -ENFILE;
1091 		goto err;
1092 	}
1093 
1094 	if (type == SOCK_STREAM)
1095 		sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
1096 	else /*dgram and  seqpacket */
1097 		sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
1098 
1099 	if (!sk) {
1100 		err = -ENOMEM;
1101 		goto err;
1102 	}
1103 
1104 	sock_init_data(sock, sk);
1105 
1106 	sk->sk_scm_rights	= 1;
1107 	sk->sk_hash		= unix_unbound_hash(sk);
1108 	sk->sk_allocation	= GFP_KERNEL_ACCOUNT;
1109 	sk->sk_write_space	= unix_write_space;
1110 	sk->sk_max_ack_backlog	= READ_ONCE(net->unx.sysctl_max_dgram_qlen);
1111 	sk->sk_destruct		= unix_sock_destructor;
1112 	lock_set_cmp_fn(&sk->sk_receive_queue.lock, unix_recvq_lock_cmp_fn, NULL);
1113 
1114 	u = unix_sk(sk);
1115 	u->listener = NULL;
1116 	u->vertex = NULL;
1117 	u->path.dentry = NULL;
1118 	u->path.mnt = NULL;
1119 	spin_lock_init(&u->lock);
1120 	lock_set_cmp_fn(&u->lock, unix_state_lock_cmp_fn, NULL);
1121 	mutex_init(&u->iolock); /* single task reading lock */
1122 	mutex_init(&u->bindlock); /* single task binding lock */
1123 	init_waitqueue_head(&u->peer_wait);
1124 	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
1125 	memset(&u->scm_stat, 0, sizeof(struct scm_stat));
1126 	unix_insert_unbound_socket(net, sk);
1127 
1128 	sock_prot_inuse_add(net, sk->sk_prot, 1);
1129 
1130 	return sk;
1131 
1132 err:
1133 	atomic_long_dec(&unix_nr_socks);
1134 	return ERR_PTR(err);
1135 }
1136 
1137 static int unix_create(struct net *net, struct socket *sock, int protocol,
1138 		       int kern)
1139 {
1140 	struct sock *sk;
1141 
1142 	if (protocol && protocol != PF_UNIX)
1143 		return -EPROTONOSUPPORT;
1144 
1145 	switch (sock->type) {
1146 	case SOCK_STREAM:
1147 		set_bit(SOCK_CUSTOM_SOCKOPT, &sock->flags);
1148 		sock->ops = &unix_stream_ops;
1149 		break;
1150 		/*
1151 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
1152 		 *	nothing uses it.
1153 		 */
1154 	case SOCK_RAW:
1155 		sock->type = SOCK_DGRAM;
1156 		fallthrough;
1157 	case SOCK_DGRAM:
1158 		sock->ops = &unix_dgram_ops;
1159 		break;
1160 	case SOCK_SEQPACKET:
1161 		sock->ops = &unix_seqpacket_ops;
1162 		break;
1163 	default:
1164 		return -ESOCKTNOSUPPORT;
1165 	}
1166 
1167 	sk = unix_create1(net, sock, kern, sock->type);
1168 	if (IS_ERR(sk))
1169 		return PTR_ERR(sk);
1170 
1171 	return 0;
1172 }
1173 
1174 static int unix_release(struct socket *sock)
1175 {
1176 	struct sock *sk = sock->sk;
1177 
1178 	if (!sk)
1179 		return 0;
1180 
1181 	sk->sk_prot->close(sk, 0);
1182 	unix_release_sock(sk, 0);
1183 	sock->sk = NULL;
1184 
1185 	return 0;
1186 }
1187 
1188 static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len,
1189 				  int type, int flags)
1190 {
1191 	struct inode *inode;
1192 	struct path path;
1193 	struct sock *sk;
1194 	int err;
1195 
1196 	unix_mkname_bsd(sunaddr, addr_len);
1197 
1198 	if (flags & SOCK_COREDUMP) {
1199 		struct path root;
1200 
1201 		task_lock(&init_task);
1202 		get_fs_root(init_task.fs, &root);
1203 		task_unlock(&init_task);
1204 
1205 		scoped_with_kernel_creds()
1206 			err = vfs_path_lookup(root.dentry, root.mnt, sunaddr->sun_path,
1207 					      LOOKUP_BENEATH | LOOKUP_NO_SYMLINKS |
1208 					      LOOKUP_NO_MAGICLINKS, &path);
1209 		path_put(&root);
1210 		if (err)
1211 			goto fail;
1212 	} else {
1213 		err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path);
1214 		if (err)
1215 			goto fail;
1216 
1217 		err = path_permission(&path, MAY_WRITE);
1218 		if (err)
1219 			goto path_put;
1220 	}
1221 
1222 	err = -ECONNREFUSED;
1223 	inode = d_backing_inode(path.dentry);
1224 	if (!S_ISSOCK(inode->i_mode))
1225 		goto path_put;
1226 
1227 	sk = unix_find_socket_byinode(inode);
1228 	if (!sk)
1229 		goto path_put;
1230 
1231 	err = -EPROTOTYPE;
1232 	if (sk->sk_type != type)
1233 		goto sock_put;
1234 
1235 	err = security_unix_find(&path, sk, flags);
1236 	if (err)
1237 		goto sock_put;
1238 
1239 	touch_atime(&path);
1240 
1241 	path_put(&path);
1242 
1243 	return sk;
1244 
1245 sock_put:
1246 	sock_put(sk);
1247 path_put:
1248 	path_put(&path);
1249 fail:
1250 	return ERR_PTR(err);
1251 }
1252 
1253 static struct sock *unix_find_abstract(struct net *net,
1254 				       struct sockaddr_un *sunaddr,
1255 				       int addr_len, int type)
1256 {
1257 	unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type);
1258 	struct dentry *dentry;
1259 	struct sock *sk;
1260 
1261 	sk = unix_find_socket_byname(net, sunaddr, addr_len, hash);
1262 	if (!sk)
1263 		return ERR_PTR(-ECONNREFUSED);
1264 
1265 	dentry = unix_sk(sk)->path.dentry;
1266 	if (dentry)
1267 		touch_atime(&unix_sk(sk)->path);
1268 
1269 	return sk;
1270 }
1271 
1272 static struct sock *unix_find_other(struct net *net,
1273 				    struct sockaddr_un *sunaddr,
1274 				    int addr_len, int type, int flags)
1275 {
1276 	struct sock *sk;
1277 
1278 	if (sunaddr->sun_path[0])
1279 		sk = unix_find_bsd(sunaddr, addr_len, type, flags);
1280 	else
1281 		sk = unix_find_abstract(net, sunaddr, addr_len, type);
1282 
1283 	return sk;
1284 }
1285 
1286 static int unix_autobind(struct sock *sk)
1287 {
1288 	struct unix_sock *u = unix_sk(sk);
1289 	unsigned int new_hash, old_hash;
1290 	struct net *net = sock_net(sk);
1291 	struct unix_address *addr;
1292 	u32 lastnum, ordernum;
1293 	int err;
1294 
1295 	err = mutex_lock_interruptible(&u->bindlock);
1296 	if (err)
1297 		return err;
1298 
1299 	if (u->addr)
1300 		goto out;
1301 
1302 	err = -ENOMEM;
1303 	addr = kzalloc(sizeof(*addr) +
1304 		       offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL);
1305 	if (!addr)
1306 		goto out;
1307 
1308 	addr->len = offsetof(struct sockaddr_un, sun_path) + 6;
1309 	addr->name->sun_family = AF_UNIX;
1310 	refcount_set(&addr->refcnt, 1);
1311 
1312 	old_hash = sk->sk_hash;
1313 	ordernum = get_random_u32();
1314 	lastnum = ordernum & 0xFFFFF;
1315 retry:
1316 	ordernum = (ordernum + 1) & 0xFFFFF;
1317 	sprintf(addr->name->sun_path + 1, "%05x", ordernum);
1318 
1319 	new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
1320 	unix_table_double_lock(net, old_hash, new_hash);
1321 
1322 	if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) {
1323 		unix_table_double_unlock(net, old_hash, new_hash);
1324 
1325 		/* __unix_find_socket_byname() may take long time if many names
1326 		 * are already in use.
1327 		 */
1328 		cond_resched();
1329 
1330 		if (ordernum == lastnum) {
1331 			/* Give up if all names seems to be in use. */
1332 			err = -ENOSPC;
1333 			unix_release_addr(addr);
1334 			goto out;
1335 		}
1336 
1337 		goto retry;
1338 	}
1339 
1340 	__unix_set_addr_hash(net, sk, addr, new_hash);
1341 	unix_table_double_unlock(net, old_hash, new_hash);
1342 	err = 0;
1343 
1344 out:	mutex_unlock(&u->bindlock);
1345 	return err;
1346 }
1347 
1348 static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
1349 			 int addr_len)
1350 {
1351 	umode_t mode = S_IFSOCK |
1352 	       (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
1353 	struct unix_sock *u = unix_sk(sk);
1354 	unsigned int new_hash, old_hash;
1355 	struct net *net = sock_net(sk);
1356 	struct mnt_idmap *idmap;
1357 	struct unix_address *addr;
1358 	struct dentry *dentry;
1359 	struct path parent;
1360 	int err;
1361 
1362 	addr_len = unix_mkname_bsd(sunaddr, addr_len);
1363 	addr = unix_create_addr(sunaddr, addr_len);
1364 	if (!addr)
1365 		return -ENOMEM;
1366 
1367 	/*
1368 	 * Get the parent directory, calculate the hash for last
1369 	 * component.
1370 	 */
1371 	dentry = start_creating_path(AT_FDCWD, addr->name->sun_path, &parent, 0);
1372 	if (IS_ERR(dentry)) {
1373 		err = PTR_ERR(dentry);
1374 		goto out;
1375 	}
1376 
1377 	/*
1378 	 * All right, let's create it.
1379 	 */
1380 	idmap = mnt_idmap(parent.mnt);
1381 	err = security_path_mknod(&parent, dentry, mode, 0);
1382 	if (!err)
1383 		err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0, NULL);
1384 	if (err)
1385 		goto out_path;
1386 	err = mutex_lock_interruptible(&u->bindlock);
1387 	if (err)
1388 		goto out_unlink;
1389 	if (u->addr)
1390 		goto out_unlock;
1391 
1392 	old_hash = sk->sk_hash;
1393 	new_hash = unix_bsd_hash(d_backing_inode(dentry));
1394 	unix_table_double_lock(net, old_hash, new_hash);
1395 	u->path.mnt = mntget(parent.mnt);
1396 	u->path.dentry = dget(dentry);
1397 	__unix_set_addr_hash(net, sk, addr, new_hash);
1398 	unix_table_double_unlock(net, old_hash, new_hash);
1399 	unix_insert_bsd_socket(sk);
1400 	mutex_unlock(&u->bindlock);
1401 	end_creating_path(&parent, dentry);
1402 	return 0;
1403 
1404 out_unlock:
1405 	mutex_unlock(&u->bindlock);
1406 	err = -EINVAL;
1407 out_unlink:
1408 	/* failed after successful mknod?  unlink what we'd created... */
1409 	vfs_unlink(idmap, d_inode(parent.dentry), dentry, NULL);
1410 out_path:
1411 	end_creating_path(&parent, dentry);
1412 out:
1413 	unix_release_addr(addr);
1414 	return err == -EEXIST ? -EADDRINUSE : err;
1415 }
1416 
1417 static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
1418 			      int addr_len)
1419 {
1420 	struct unix_sock *u = unix_sk(sk);
1421 	unsigned int new_hash, old_hash;
1422 	struct net *net = sock_net(sk);
1423 	struct unix_address *addr;
1424 	int err;
1425 
1426 	addr = unix_create_addr(sunaddr, addr_len);
1427 	if (!addr)
1428 		return -ENOMEM;
1429 
1430 	err = mutex_lock_interruptible(&u->bindlock);
1431 	if (err)
1432 		goto out;
1433 
1434 	if (u->addr) {
1435 		err = -EINVAL;
1436 		goto out_mutex;
1437 	}
1438 
1439 	old_hash = sk->sk_hash;
1440 	new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
1441 	unix_table_double_lock(net, old_hash, new_hash);
1442 
1443 	if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash))
1444 		goto out_spin;
1445 
1446 	__unix_set_addr_hash(net, sk, addr, new_hash);
1447 	unix_table_double_unlock(net, old_hash, new_hash);
1448 	mutex_unlock(&u->bindlock);
1449 	return 0;
1450 
1451 out_spin:
1452 	unix_table_double_unlock(net, old_hash, new_hash);
1453 	err = -EADDRINUSE;
1454 out_mutex:
1455 	mutex_unlock(&u->bindlock);
1456 out:
1457 	unix_release_addr(addr);
1458 	return err;
1459 }
1460 
1461 static int unix_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len)
1462 {
1463 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1464 	struct sock *sk = sock->sk;
1465 	int err;
1466 
1467 	if (addr_len == offsetof(struct sockaddr_un, sun_path) &&
1468 	    sunaddr->sun_family == AF_UNIX)
1469 		return unix_autobind(sk);
1470 
1471 	err = unix_validate_addr(sunaddr, addr_len);
1472 	if (err)
1473 		return err;
1474 
1475 	if (sunaddr->sun_path[0])
1476 		err = unix_bind_bsd(sk, sunaddr, addr_len);
1477 	else
1478 		err = unix_bind_abstract(sk, sunaddr, addr_len);
1479 
1480 	return err;
1481 }
1482 
1483 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1484 {
1485 	if (unlikely(sk1 == sk2) || !sk2) {
1486 		unix_state_lock(sk1);
1487 		return;
1488 	}
1489 
1490 	if (sk1 > sk2)
1491 		swap(sk1, sk2);
1492 
1493 	unix_state_lock(sk1);
1494 	unix_state_lock(sk2);
1495 }
1496 
1497 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1498 {
1499 	if (unlikely(sk1 == sk2) || !sk2) {
1500 		unix_state_unlock(sk1);
1501 		return;
1502 	}
1503 	unix_state_unlock(sk1);
1504 	unix_state_unlock(sk2);
1505 }
1506 
1507 static int unix_dgram_connect(struct socket *sock, struct sockaddr_unsized *addr,
1508 			      int alen, int flags)
1509 {
1510 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1511 	struct sock *sk = sock->sk;
1512 	struct sock *other;
1513 	int err;
1514 
1515 	err = -EINVAL;
1516 	if (alen < offsetofend(struct sockaddr, sa_family))
1517 		goto out;
1518 
1519 	if (addr->sa_family != AF_UNSPEC) {
1520 		err = unix_validate_addr(sunaddr, alen);
1521 		if (err)
1522 			goto out;
1523 
1524 		err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, addr, &alen);
1525 		if (err)
1526 			goto out;
1527 
1528 		if (unix_may_passcred(sk) && !READ_ONCE(unix_sk(sk)->addr)) {
1529 			err = unix_autobind(sk);
1530 			if (err)
1531 				goto out;
1532 		}
1533 
1534 restart:
1535 		other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type, 0);
1536 		if (IS_ERR(other)) {
1537 			err = PTR_ERR(other);
1538 			goto out;
1539 		}
1540 
1541 		unix_state_double_lock(sk, other);
1542 
1543 		/* Apparently VFS overslept socket death. Retry. */
1544 		if (sock_flag(other, SOCK_DEAD)) {
1545 			unix_state_double_unlock(sk, other);
1546 			sock_put(other);
1547 			goto restart;
1548 		}
1549 
1550 		err = -EPERM;
1551 		if (!unix_may_send(sk, other))
1552 			goto out_unlock;
1553 
1554 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1555 		if (err)
1556 			goto out_unlock;
1557 
1558 		WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED);
1559 		WRITE_ONCE(other->sk_state, TCP_ESTABLISHED);
1560 	} else {
1561 		/*
1562 		 *	1003.1g breaking connected state with AF_UNSPEC
1563 		 */
1564 		other = NULL;
1565 		unix_state_double_lock(sk, other);
1566 	}
1567 
1568 	/*
1569 	 * If it was connected, reconnect.
1570 	 */
1571 	if (unix_peer(sk)) {
1572 		struct sock *old_peer = unix_peer(sk);
1573 
1574 		unix_peer(sk) = other;
1575 		if (!other)
1576 			WRITE_ONCE(sk->sk_state, TCP_CLOSE);
1577 		unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1578 
1579 		unix_state_double_unlock(sk, other);
1580 
1581 		if (other != old_peer) {
1582 			unix_dgram_disconnected(sk, old_peer);
1583 
1584 			unix_state_lock(old_peer);
1585 			if (!unix_peer(old_peer))
1586 				WRITE_ONCE(old_peer->sk_state, TCP_CLOSE);
1587 			unix_state_unlock(old_peer);
1588 		}
1589 
1590 		sock_put(old_peer);
1591 	} else {
1592 		unix_peer(sk) = other;
1593 		unix_state_double_unlock(sk, other);
1594 	}
1595 
1596 	return 0;
1597 
1598 out_unlock:
1599 	unix_state_double_unlock(sk, other);
1600 	sock_put(other);
1601 out:
1602 	return err;
1603 }
1604 
1605 static long unix_wait_for_peer(struct sock *other, long timeo)
1606 {
1607 	struct unix_sock *u = unix_sk(other);
1608 	int sched;
1609 	DEFINE_WAIT(wait);
1610 
1611 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1612 
1613 	sched = !sock_flag(other, SOCK_DEAD) &&
1614 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1615 		unix_recvq_full_lockless(other);
1616 
1617 	unix_state_unlock(other);
1618 
1619 	if (sched)
1620 		timeo = schedule_timeout(timeo);
1621 
1622 	finish_wait(&u->peer_wait, &wait);
1623 	return timeo;
1624 }
1625 
1626 static int unix_stream_connect(struct socket *sock, struct sockaddr_unsized *uaddr,
1627 			       int addr_len, int flags)
1628 {
1629 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1630 	struct sock *sk = sock->sk, *newsk = NULL, *other = NULL;
1631 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1632 	struct unix_peercred peercred = {};
1633 	struct net *net = sock_net(sk);
1634 	struct sk_buff *skb = NULL;
1635 	unsigned char state;
1636 	long timeo;
1637 	int err;
1638 
1639 	err = unix_validate_addr(sunaddr, addr_len);
1640 	if (err)
1641 		goto out;
1642 
1643 	err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, &addr_len);
1644 	if (err)
1645 		goto out;
1646 
1647 	if (unix_may_passcred(sk) && !READ_ONCE(u->addr)) {
1648 		err = unix_autobind(sk);
1649 		if (err)
1650 			goto out;
1651 	}
1652 
1653 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1654 
1655 	err = prepare_peercred(&peercred);
1656 	if (err)
1657 		goto out;
1658 
1659 	/* create new sock for complete connection */
1660 	newsk = unix_create1(net, NULL, 0, sock->type);
1661 	if (IS_ERR(newsk)) {
1662 		err = PTR_ERR(newsk);
1663 		goto out;
1664 	}
1665 
1666 	/* Allocate skb for sending to listening sock */
1667 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1668 	if (!skb) {
1669 		err = -ENOMEM;
1670 		goto out_free_sk;
1671 	}
1672 
1673 restart:
1674 	/*  Find listening sock. */
1675 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, flags);
1676 	if (IS_ERR(other)) {
1677 		err = PTR_ERR(other);
1678 		goto out_free_skb;
1679 	}
1680 
1681 	unix_state_lock(other);
1682 
1683 	/* Apparently VFS overslept socket death. Retry. */
1684 	if (sock_flag(other, SOCK_DEAD)) {
1685 		unix_state_unlock(other);
1686 		sock_put(other);
1687 		goto restart;
1688 	}
1689 
1690 	if (other->sk_state != TCP_LISTEN ||
1691 	    other->sk_shutdown & RCV_SHUTDOWN) {
1692 		err = -ECONNREFUSED;
1693 		goto out_unlock;
1694 	}
1695 
1696 	if (unix_recvq_full_lockless(other)) {
1697 		if (!timeo) {
1698 			err = -EAGAIN;
1699 			goto out_unlock;
1700 		}
1701 
1702 		timeo = unix_wait_for_peer(other, timeo);
1703 		sock_put(other);
1704 
1705 		err = sock_intr_errno(timeo);
1706 		if (signal_pending(current))
1707 			goto out_free_skb;
1708 
1709 		goto restart;
1710 	}
1711 
1712 	/* self connect and simultaneous connect are eliminated
1713 	 * by rejecting TCP_LISTEN socket to avoid deadlock.
1714 	 */
1715 	state = READ_ONCE(sk->sk_state);
1716 	if (unlikely(state != TCP_CLOSE)) {
1717 		err = state == TCP_ESTABLISHED ? -EISCONN : -EINVAL;
1718 		goto out_unlock;
1719 	}
1720 
1721 	unix_state_lock(sk);
1722 
1723 	if (unlikely(sk->sk_state != TCP_CLOSE)) {
1724 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EINVAL;
1725 		unix_state_unlock(sk);
1726 		goto out_unlock;
1727 	}
1728 
1729 	err = security_unix_stream_connect(sk, other, newsk);
1730 	if (err) {
1731 		unix_state_unlock(sk);
1732 		goto out_unlock;
1733 	}
1734 
1735 	/* The way is open! Fastly set all the necessary fields... */
1736 
1737 	sock_hold(sk);
1738 	unix_peer(newsk) = sk;
1739 	newsk->sk_state = TCP_ESTABLISHED;
1740 	newsk->sk_type = sk->sk_type;
1741 	newsk->sk_scm_recv_flags = other->sk_scm_recv_flags;
1742 	init_peercred(newsk, &peercred);
1743 
1744 	newu = unix_sk(newsk);
1745 	newu->listener = other;
1746 	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1747 	otheru = unix_sk(other);
1748 
1749 	/* copy address information from listening to new sock
1750 	 *
1751 	 * The contents of *(otheru->addr) and otheru->path
1752 	 * are seen fully set up here, since we have found
1753 	 * otheru in hash under its lock.  Insertion into the
1754 	 * hash chain we'd found it in had been done in an
1755 	 * earlier critical area protected by the chain's lock,
1756 	 * the same one where we'd set *(otheru->addr) contents,
1757 	 * as well as otheru->path and otheru->addr itself.
1758 	 *
1759 	 * Using smp_store_release() here to set newu->addr
1760 	 * is enough to make those stores, as well as stores
1761 	 * to newu->path visible to anyone who gets newu->addr
1762 	 * by smp_load_acquire().  IOW, the same warranties
1763 	 * as for unix_sock instances bound in unix_bind() or
1764 	 * in unix_autobind().
1765 	 */
1766 	if (otheru->path.dentry) {
1767 		path_get(&otheru->path);
1768 		newu->path = otheru->path;
1769 	}
1770 	refcount_inc(&otheru->addr->refcnt);
1771 	smp_store_release(&newu->addr, otheru->addr);
1772 
1773 	/* Set credentials */
1774 	copy_peercred(sk, other);
1775 
1776 	WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED);
1777 	sock_hold(newsk);
1778 
1779 	smp_mb__after_atomic();	/* sock_hold() does an atomic_inc() */
1780 	unix_peer(sk)	= newsk;
1781 
1782 	unix_state_unlock(sk);
1783 
1784 	/* take ten and send info to listening sock */
1785 	spin_lock(&other->sk_receive_queue.lock);
1786 	__skb_queue_tail(&other->sk_receive_queue, skb);
1787 	spin_unlock(&other->sk_receive_queue.lock);
1788 	unix_state_unlock(other);
1789 	READ_ONCE(other->sk_data_ready)(other);
1790 	sock_put(other);
1791 	return 0;
1792 
1793 out_unlock:
1794 	unix_state_unlock(other);
1795 	sock_put(other);
1796 out_free_skb:
1797 	consume_skb(skb);
1798 out_free_sk:
1799 	unix_release_sock(newsk, 0);
1800 out:
1801 	drop_peercred(&peercred);
1802 	return err;
1803 }
1804 
1805 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1806 {
1807 	struct unix_peercred ska_peercred = {}, skb_peercred = {};
1808 	struct sock *ska = socka->sk, *skb = sockb->sk;
1809 	int err;
1810 
1811 	err = prepare_peercred(&ska_peercred);
1812 	if (err)
1813 		return err;
1814 
1815 	err = prepare_peercred(&skb_peercred);
1816 	if (err) {
1817 		drop_peercred(&ska_peercred);
1818 		return err;
1819 	}
1820 
1821 	/* Join our sockets back to back */
1822 	sock_hold(ska);
1823 	sock_hold(skb);
1824 	unix_peer(ska) = skb;
1825 	unix_peer(skb) = ska;
1826 	init_peercred(ska, &ska_peercred);
1827 	init_peercred(skb, &skb_peercred);
1828 
1829 	ska->sk_state = TCP_ESTABLISHED;
1830 	skb->sk_state = TCP_ESTABLISHED;
1831 
1832 	return 0;
1833 }
1834 
1835 static int unix_accept(struct socket *sock, struct socket *newsock,
1836 		       struct proto_accept_arg *arg)
1837 {
1838 	struct sock *sk = sock->sk;
1839 	struct sk_buff *skb;
1840 	struct sock *tsk;
1841 
1842 	arg->err = -EOPNOTSUPP;
1843 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1844 		goto out;
1845 
1846 	arg->err = -EINVAL;
1847 	if (READ_ONCE(sk->sk_state) != TCP_LISTEN)
1848 		goto out;
1849 
1850 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1851 	 * so that no locks are necessary.
1852 	 */
1853 
1854 	skb = skb_recv_datagram(sk, (arg->flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
1855 				&arg->err);
1856 	if (!skb) {
1857 		/* This means receive shutdown. */
1858 		if (arg->err == 0)
1859 			arg->err = -EINVAL;
1860 		goto out;
1861 	}
1862 
1863 	tsk = skb->sk;
1864 	skb_free_datagram(sk, skb);
1865 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1866 
1867 	if (tsk->sk_type == SOCK_STREAM)
1868 		set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags);
1869 
1870 	/* attach accepted sock to socket */
1871 	unix_state_lock(tsk);
1872 	unix_update_edges(unix_sk(tsk));
1873 	sock_graft(tsk, newsock);
1874 	unix_state_unlock(tsk);
1875 	return 0;
1876 
1877 out:
1878 	return arg->err;
1879 }
1880 
1881 
1882 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1883 {
1884 	struct sock *sk = sock->sk;
1885 	struct unix_address *addr;
1886 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1887 	int err = 0;
1888 
1889 	if (peer) {
1890 		sk = unix_peer_get(sk);
1891 
1892 		err = -ENOTCONN;
1893 		if (!sk)
1894 			goto out;
1895 		err = 0;
1896 	} else {
1897 		sock_hold(sk);
1898 	}
1899 
1900 	addr = smp_load_acquire(&unix_sk(sk)->addr);
1901 	if (!addr) {
1902 		sunaddr->sun_family = AF_UNIX;
1903 		sunaddr->sun_path[0] = 0;
1904 		err = offsetof(struct sockaddr_un, sun_path);
1905 	} else {
1906 		err = addr->len;
1907 		memcpy(sunaddr, addr->name, addr->len);
1908 
1909 		if (peer)
1910 			BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err,
1911 					       CGROUP_UNIX_GETPEERNAME);
1912 		else
1913 			BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err,
1914 					       CGROUP_UNIX_GETSOCKNAME);
1915 	}
1916 	sock_put(sk);
1917 out:
1918 	return err;
1919 }
1920 
1921 /* The "user->unix_inflight" variable is protected by the garbage
1922  * collection lock, and we just read it locklessly here. If you go
1923  * over the limit, there might be a tiny race in actually noticing
1924  * it across threads. Tough.
1925  */
1926 static inline bool too_many_unix_fds(struct task_struct *p)
1927 {
1928 	struct user_struct *user = current_user();
1929 
1930 	if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE)))
1931 		return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1932 	return false;
1933 }
1934 
1935 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1936 {
1937 	if (too_many_unix_fds(current))
1938 		return -ETOOMANYREFS;
1939 
1940 	UNIXCB(skb).fp = scm->fp;
1941 	scm->fp = NULL;
1942 
1943 	if (unix_prepare_fpl(UNIXCB(skb).fp))
1944 		return -ENOMEM;
1945 
1946 	return 0;
1947 }
1948 
1949 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1950 {
1951 	scm->fp = UNIXCB(skb).fp;
1952 	UNIXCB(skb).fp = NULL;
1953 
1954 	unix_destroy_fpl(scm->fp);
1955 }
1956 
1957 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1958 {
1959 	scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1960 
1961 	unix_peek_fpl(scm->fp);
1962 }
1963 
1964 static void unix_destruct_scm(struct sk_buff *skb)
1965 {
1966 	struct scm_cookie scm = {};
1967 
1968 	swap(scm.pid, UNIXCB(skb).pid);
1969 
1970 	if (UNIXCB(skb).fp)
1971 		unix_detach_fds(&scm, skb);
1972 
1973 	scm_destroy(&scm);
1974 }
1975 
1976 static void unix_wfree(struct sk_buff *skb)
1977 {
1978 	unix_destruct_scm(skb);
1979 	sock_wfree(skb);
1980 }
1981 
1982 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1983 {
1984 	int err = 0;
1985 
1986 	UNIXCB(skb).pid = get_pid(scm->pid);
1987 	UNIXCB(skb).uid = scm->creds.uid;
1988 	UNIXCB(skb).gid = scm->creds.gid;
1989 	UNIXCB(skb).fp = NULL;
1990 	unix_get_secdata(scm, skb);
1991 	if (scm->fp && send_fds)
1992 		err = unix_attach_fds(scm, skb);
1993 
1994 	skb->destructor = unix_wfree;
1995 	return err;
1996 }
1997 
1998 static void unix_skb_to_scm(struct sk_buff *skb, struct scm_cookie *scm)
1999 {
2000 	scm_set_cred(scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2001 	unix_set_secdata(scm, skb);
2002 }
2003 
2004 /**
2005  * unix_maybe_add_creds() - Adds current task uid/gid and struct pid to skb if needed.
2006  * @skb: skb to attach creds to.
2007  * @sk: Sender sock.
2008  * @other: Receiver sock.
2009  *
2010  * Some apps rely on write() giving SCM_CREDENTIALS
2011  * We include credentials if source or destination socket
2012  * asserted SOCK_PASSCRED.
2013  *
2014  * Context: May sleep.
2015  * Return: On success zero, on error a negative error code is returned.
2016  */
2017 static int unix_maybe_add_creds(struct sk_buff *skb, const struct sock *sk,
2018 				const struct sock *other)
2019 {
2020 	if (UNIXCB(skb).pid)
2021 		return 0;
2022 
2023 	if (unix_may_passcred(sk) || unix_may_passcred(other) ||
2024 	    !other->sk_socket) {
2025 		struct pid *pid;
2026 		int err;
2027 
2028 		pid = task_tgid(current);
2029 		err = pidfs_register_pid(pid);
2030 		if (unlikely(err))
2031 			return err;
2032 
2033 		UNIXCB(skb).pid = get_pid(pid);
2034 		current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
2035 	}
2036 
2037 	return 0;
2038 }
2039 
2040 static bool unix_skb_scm_eq(struct sk_buff *skb,
2041 			    struct scm_cookie *scm)
2042 {
2043 	return UNIXCB(skb).pid == scm->pid &&
2044 	       uid_eq(UNIXCB(skb).uid, scm->creds.uid) &&
2045 	       gid_eq(UNIXCB(skb).gid, scm->creds.gid) &&
2046 	       unix_secdata_eq(scm, skb);
2047 }
2048 
2049 static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
2050 {
2051 	struct scm_fp_list *fp = UNIXCB(skb).fp;
2052 	struct unix_sock *u = unix_sk(sk);
2053 
2054 	if (unlikely(fp && fp->count)) {
2055 		atomic_add(fp->count, &u->scm_stat.nr_fds);
2056 		unix_add_edges(fp, u);
2057 	}
2058 }
2059 
2060 static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
2061 {
2062 	struct scm_fp_list *fp = UNIXCB(skb).fp;
2063 	struct unix_sock *u = unix_sk(sk);
2064 
2065 	if (unlikely(fp && fp->count)) {
2066 		atomic_sub(fp->count, &u->scm_stat.nr_fds);
2067 		unix_del_edges(fp);
2068 	}
2069 }
2070 
2071 static void unix_orphan_scm(struct sock *sk, struct sk_buff *skb)
2072 {
2073 	scm_stat_del(sk, skb);
2074 	unix_destruct_scm(skb);
2075 	skb->destructor = sock_wfree;
2076 }
2077 
2078 /*
2079  *	Send AF_UNIX data.
2080  */
2081 
2082 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
2083 			      size_t len)
2084 {
2085 	struct sock *sk = sock->sk, *other = NULL;
2086 	struct unix_sock *u = unix_sk(sk);
2087 	struct scm_cookie scm;
2088 	struct sk_buff *skb;
2089 	int data_len = 0;
2090 	int sk_locked;
2091 	long timeo;
2092 	int err;
2093 
2094 	err = scm_send(sock, msg, &scm, false);
2095 	if (err < 0)
2096 		return err;
2097 
2098 	if (msg->msg_flags & MSG_OOB) {
2099 		err = -EOPNOTSUPP;
2100 		goto out;
2101 	}
2102 
2103 	if (msg->msg_namelen) {
2104 		err = unix_validate_addr(msg->msg_name, msg->msg_namelen);
2105 		if (err)
2106 			goto out;
2107 
2108 		err = BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk,
2109 							    msg->msg_name,
2110 							    &msg->msg_namelen,
2111 							    NULL);
2112 		if (err)
2113 			goto out;
2114 	}
2115 
2116 	if (unix_may_passcred(sk) && !READ_ONCE(u->addr)) {
2117 		err = unix_autobind(sk);
2118 		if (err)
2119 			goto out;
2120 	}
2121 
2122 	if (len > READ_ONCE(sk->sk_sndbuf) - 32) {
2123 		err = -EMSGSIZE;
2124 		goto out;
2125 	}
2126 
2127 	if (len > SKB_MAX_ALLOC) {
2128 		data_len = min_t(size_t,
2129 				 len - SKB_MAX_ALLOC,
2130 				 MAX_SKB_FRAGS * PAGE_SIZE);
2131 		data_len = PAGE_ALIGN(data_len);
2132 
2133 		BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
2134 	}
2135 
2136 	skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
2137 				   msg->msg_flags & MSG_DONTWAIT, &err,
2138 				   PAGE_ALLOC_COSTLY_ORDER);
2139 	if (!skb)
2140 		goto out;
2141 
2142 	err = unix_scm_to_skb(&scm, skb, true);
2143 	if (err < 0)
2144 		goto out_free;
2145 
2146 	skb_put(skb, len - data_len);
2147 	skb->data_len = data_len;
2148 	skb->len = len;
2149 	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
2150 	if (err)
2151 		goto out_free;
2152 
2153 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
2154 
2155 	if (msg->msg_namelen) {
2156 lookup:
2157 		other = unix_find_other(sock_net(sk), msg->msg_name,
2158 					msg->msg_namelen, sk->sk_type, 0);
2159 		if (IS_ERR(other)) {
2160 			err = PTR_ERR(other);
2161 			goto out_free;
2162 		}
2163 	} else {
2164 		other = unix_peer_get(sk);
2165 		if (!other) {
2166 			err = -ENOTCONN;
2167 			goto out_free;
2168 		}
2169 	}
2170 
2171 	if (sk_filter(other, skb) < 0) {
2172 		/* Toss the packet but do not return any error to the sender */
2173 		err = len;
2174 		goto out_sock_put;
2175 	}
2176 
2177 	err = unix_maybe_add_creds(skb, sk, other);
2178 	if (err)
2179 		goto out_sock_put;
2180 
2181 restart:
2182 	sk_locked = 0;
2183 	unix_state_lock(other);
2184 restart_locked:
2185 
2186 	if (!unix_may_send(sk, other)) {
2187 		err = -EPERM;
2188 		goto out_unlock;
2189 	}
2190 
2191 	if (unlikely(sock_flag(other, SOCK_DEAD))) {
2192 		/* Check with 1003.1g - what should datagram error */
2193 
2194 		unix_state_unlock(other);
2195 
2196 		if (sk->sk_type == SOCK_SEQPACKET) {
2197 			/* We are here only when racing with unix_release_sock()
2198 			 * is clearing @other. Never change state to TCP_CLOSE
2199 			 * unlike SOCK_DGRAM wants.
2200 			 */
2201 			err = -EPIPE;
2202 			goto out_sock_put;
2203 		}
2204 
2205 		if (!sk_locked)
2206 			unix_state_lock(sk);
2207 
2208 		if (unix_peer(sk) == other) {
2209 			unix_peer(sk) = NULL;
2210 			unix_dgram_peer_wake_disconnect_wakeup(sk, other);
2211 
2212 			WRITE_ONCE(sk->sk_state, TCP_CLOSE);
2213 			unix_state_unlock(sk);
2214 
2215 			unix_dgram_disconnected(sk, other);
2216 			sock_put(other);
2217 			err = -ECONNREFUSED;
2218 			goto out_sock_put;
2219 		}
2220 
2221 		unix_state_unlock(sk);
2222 
2223 		if (!msg->msg_namelen) {
2224 			err = -ECONNRESET;
2225 			goto out_sock_put;
2226 		}
2227 
2228 		sock_put(other);
2229 		goto lookup;
2230 	}
2231 
2232 	if (other->sk_shutdown & RCV_SHUTDOWN) {
2233 		err = -EPIPE;
2234 		goto out_unlock;
2235 	}
2236 
2237 	if (UNIXCB(skb).fp && !other->sk_scm_rights) {
2238 		err = -EPERM;
2239 		goto out_unlock;
2240 	}
2241 
2242 	if (sk->sk_type != SOCK_SEQPACKET) {
2243 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
2244 		if (err)
2245 			goto out_unlock;
2246 	}
2247 
2248 	/* other == sk && unix_peer(other) != sk if
2249 	 * - unix_peer(sk) == NULL, destination address bound to sk
2250 	 * - unix_peer(sk) == sk by time of get but disconnected before lock
2251 	 */
2252 	if (other != sk &&
2253 	    unlikely(unix_peer(other) != sk &&
2254 	    unix_recvq_full_lockless(other))) {
2255 		if (timeo) {
2256 			timeo = unix_wait_for_peer(other, timeo);
2257 
2258 			err = sock_intr_errno(timeo);
2259 			if (signal_pending(current))
2260 				goto out_sock_put;
2261 
2262 			goto restart;
2263 		}
2264 
2265 		if (!sk_locked) {
2266 			unix_state_unlock(other);
2267 			unix_state_double_lock(sk, other);
2268 		}
2269 
2270 		if (unix_peer(sk) != other ||
2271 		    unix_dgram_peer_wake_me(sk, other)) {
2272 			err = -EAGAIN;
2273 			sk_locked = 1;
2274 			goto out_unlock;
2275 		}
2276 
2277 		if (!sk_locked) {
2278 			sk_locked = 1;
2279 			goto restart_locked;
2280 		}
2281 	}
2282 
2283 	if (unlikely(sk_locked))
2284 		unix_state_unlock(sk);
2285 
2286 	if (sock_flag(other, SOCK_RCVTSTAMP))
2287 		__net_timestamp(skb);
2288 
2289 	scm_stat_add(other, skb);
2290 	skb_queue_tail(&other->sk_receive_queue, skb);
2291 	unix_state_unlock(other);
2292 	READ_ONCE(other->sk_data_ready)(other);
2293 	sock_put(other);
2294 	scm_destroy(&scm);
2295 	return len;
2296 
2297 out_unlock:
2298 	if (sk_locked)
2299 		unix_state_unlock(sk);
2300 	unix_state_unlock(other);
2301 out_sock_put:
2302 	sock_put(other);
2303 out_free:
2304 	consume_skb(skb);
2305 out:
2306 	scm_destroy(&scm);
2307 	return err;
2308 }
2309 
2310 /* We use paged skbs for stream sockets, and limit occupancy to 32768
2311  * bytes, and a minimum of a full page.
2312  */
2313 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
2314 
2315 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2316 static int queue_oob(struct sock *sk, struct msghdr *msg, struct sock *other,
2317 		     struct scm_cookie *scm, bool fds_sent)
2318 {
2319 	struct unix_sock *ousk = unix_sk(other);
2320 	struct sk_buff *skb;
2321 	int err;
2322 
2323 	skb = sock_alloc_send_skb(sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
2324 
2325 	if (!skb)
2326 		return err;
2327 
2328 	err = unix_scm_to_skb(scm, skb, !fds_sent);
2329 	if (err < 0)
2330 		goto out;
2331 
2332 	err = unix_maybe_add_creds(skb, sk, other);
2333 	if (err)
2334 		goto out;
2335 
2336 	skb_put(skb, 1);
2337 	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
2338 
2339 	if (err)
2340 		goto out;
2341 
2342 	unix_state_lock(other);
2343 
2344 	if (sock_flag(other, SOCK_DEAD) ||
2345 	    (other->sk_shutdown & RCV_SHUTDOWN)) {
2346 		err = -EPIPE;
2347 		goto out_unlock;
2348 	}
2349 
2350 	if (UNIXCB(skb).fp && !other->sk_scm_rights) {
2351 		err = -EPERM;
2352 		goto out_unlock;
2353 	}
2354 
2355 	scm_stat_add(other, skb);
2356 
2357 	spin_lock(&other->sk_receive_queue.lock);
2358 	WRITE_ONCE(ousk->oob_skb, skb);
2359 	WRITE_ONCE(ousk->inq_len, ousk->inq_len + 1);
2360 	__skb_queue_tail(&other->sk_receive_queue, skb);
2361 	spin_unlock(&other->sk_receive_queue.lock);
2362 
2363 	sk_send_sigurg(other);
2364 	unix_state_unlock(other);
2365 	READ_ONCE(other->sk_data_ready)(other);
2366 
2367 	return 0;
2368 out_unlock:
2369 	unix_state_unlock(other);
2370 out:
2371 	consume_skb(skb);
2372 	return err;
2373 }
2374 #endif
2375 
2376 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
2377 			       size_t len)
2378 {
2379 	struct sock *sk = sock->sk;
2380 	struct sk_buff *skb = NULL;
2381 	struct sock *other = NULL;
2382 	struct unix_sock *otheru;
2383 	struct scm_cookie scm;
2384 	bool fds_sent = false;
2385 	int err, sent = 0;
2386 
2387 	err = scm_send(sock, msg, &scm, false);
2388 	if (err < 0)
2389 		return err;
2390 
2391 	if (msg->msg_flags & MSG_OOB) {
2392 		err = -EOPNOTSUPP;
2393 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2394 		if (len)
2395 			len--;
2396 		else
2397 #endif
2398 			goto out_err;
2399 	}
2400 
2401 	if (msg->msg_namelen) {
2402 		err = READ_ONCE(sk->sk_state) == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
2403 		goto out_err;
2404 	}
2405 
2406 	other = unix_peer(sk);
2407 	if (!other) {
2408 		err = -ENOTCONN;
2409 		goto out_err;
2410 	}
2411 
2412 	otheru = unix_sk(other);
2413 
2414 	if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
2415 		goto out_pipe;
2416 
2417 	while (sent < len) {
2418 		int size = len - sent;
2419 		int data_len;
2420 
2421 		if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
2422 			skb = sock_alloc_send_pskb(sk, 0, 0,
2423 						   msg->msg_flags & MSG_DONTWAIT,
2424 						   &err, 0);
2425 		} else {
2426 			/* Keep two messages in the pipe so it schedules better */
2427 			size = min_t(int, size, (READ_ONCE(sk->sk_sndbuf) >> 1) - 64);
2428 
2429 			/* allow fallback to order-0 allocations */
2430 			size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
2431 
2432 			data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
2433 
2434 			data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
2435 
2436 			skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
2437 						   msg->msg_flags & MSG_DONTWAIT, &err,
2438 						   get_order(UNIX_SKB_FRAGS_SZ));
2439 		}
2440 		if (!skb)
2441 			goto out_err;
2442 
2443 		/* Only send the fds in the first buffer */
2444 		err = unix_scm_to_skb(&scm, skb, !fds_sent);
2445 		if (err < 0)
2446 			goto out_free;
2447 
2448 		fds_sent = true;
2449 
2450 		err = unix_maybe_add_creds(skb, sk, other);
2451 		if (err)
2452 			goto out_free;
2453 
2454 		if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
2455 			skb->ip_summed = CHECKSUM_UNNECESSARY;
2456 			err = skb_splice_from_iter(skb, &msg->msg_iter, size);
2457 			if (err < 0)
2458 				goto out_free;
2459 
2460 			size = err;
2461 			refcount_add(size, &sk->sk_wmem_alloc);
2462 		} else {
2463 			skb_put(skb, size - data_len);
2464 			skb->data_len = data_len;
2465 			skb->len = size;
2466 			err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
2467 			if (err)
2468 				goto out_free;
2469 		}
2470 
2471 		unix_state_lock(other);
2472 
2473 		if (sock_flag(other, SOCK_DEAD) ||
2474 		    (other->sk_shutdown & RCV_SHUTDOWN))
2475 			goto out_pipe_unlock;
2476 
2477 		if (UNIXCB(skb).fp && !other->sk_scm_rights) {
2478 			unix_state_unlock(other);
2479 			err = -EPERM;
2480 			goto out_free;
2481 		}
2482 
2483 		scm_stat_add(other, skb);
2484 
2485 		spin_lock(&other->sk_receive_queue.lock);
2486 		WRITE_ONCE(otheru->inq_len, otheru->inq_len + skb->len);
2487 		__skb_queue_tail(&other->sk_receive_queue, skb);
2488 		spin_unlock(&other->sk_receive_queue.lock);
2489 
2490 		unix_state_unlock(other);
2491 		READ_ONCE(other->sk_data_ready)(other);
2492 		sent += size;
2493 	}
2494 
2495 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2496 	if (msg->msg_flags & MSG_OOB) {
2497 		err = queue_oob(sk, msg, other, &scm, fds_sent);
2498 		if (err)
2499 			goto out_err;
2500 		sent++;
2501 	}
2502 #endif
2503 
2504 	scm_destroy(&scm);
2505 
2506 	return sent;
2507 
2508 out_pipe_unlock:
2509 	unix_state_unlock(other);
2510 out_pipe:
2511 	if (!sent && !(msg->msg_flags & MSG_NOSIGNAL))
2512 		send_sig(SIGPIPE, current, 0);
2513 	err = -EPIPE;
2514 out_free:
2515 	consume_skb(skb);
2516 out_err:
2517 	scm_destroy(&scm);
2518 	return sent ? : err;
2519 }
2520 
2521 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2522 				  size_t len)
2523 {
2524 	int err;
2525 	struct sock *sk = sock->sk;
2526 
2527 	err = sock_error(sk);
2528 	if (err)
2529 		return err;
2530 
2531 	if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)
2532 		return -ENOTCONN;
2533 
2534 	if (msg->msg_namelen)
2535 		msg->msg_namelen = 0;
2536 
2537 	return unix_dgram_sendmsg(sock, msg, len);
2538 }
2539 
2540 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2541 				  size_t size, int flags)
2542 {
2543 	struct sock *sk = sock->sk;
2544 
2545 	if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)
2546 		return -ENOTCONN;
2547 
2548 	return unix_dgram_recvmsg(sock, msg, size, flags);
2549 }
2550 
2551 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2552 {
2553 	struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2554 
2555 	if (addr) {
2556 		msg->msg_namelen = addr->len;
2557 		memcpy(msg->msg_name, addr->name, addr->len);
2558 	}
2559 }
2560 
2561 int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
2562 			 int flags)
2563 {
2564 	struct scm_cookie scm;
2565 	struct socket *sock = sk->sk_socket;
2566 	struct unix_sock *u = unix_sk(sk);
2567 	struct sk_buff *skb, *last;
2568 	long timeo;
2569 	int skip;
2570 	int err;
2571 
2572 	err = -EOPNOTSUPP;
2573 	if (flags&MSG_OOB)
2574 		goto out;
2575 
2576 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2577 
2578 	do {
2579 		mutex_lock(&u->iolock);
2580 
2581 		skip = sk_peek_offset(sk, flags);
2582 		skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2583 					      &skip, &err, &last);
2584 		if (skb) {
2585 			if (!(flags & MSG_PEEK))
2586 				scm_stat_del(sk, skb);
2587 			break;
2588 		}
2589 
2590 		mutex_unlock(&u->iolock);
2591 
2592 		if (err != -EAGAIN)
2593 			break;
2594 	} while (timeo &&
2595 		 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2596 					      &err, &timeo, last));
2597 
2598 	if (!skb) { /* implies iolock unlocked */
2599 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2600 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2601 		    (READ_ONCE(sk->sk_shutdown) & RCV_SHUTDOWN))
2602 			err = 0;
2603 		goto out;
2604 	}
2605 
2606 	if (wq_has_sleeper(&u->peer_wait))
2607 		wake_up_interruptible_sync_poll(&u->peer_wait,
2608 						EPOLLOUT | EPOLLWRNORM |
2609 						EPOLLWRBAND);
2610 
2611 	if (msg->msg_name) {
2612 		unix_copy_addr(msg, skb->sk);
2613 
2614 		BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk,
2615 						      msg->msg_name,
2616 						      &msg->msg_namelen);
2617 	}
2618 
2619 	if (size > skb->len - skip)
2620 		size = skb->len - skip;
2621 	else if (size < skb->len - skip)
2622 		msg->msg_flags |= MSG_TRUNC;
2623 
2624 	err = skb_copy_datagram_msg(skb, skip, msg, size);
2625 	if (err)
2626 		goto out_free;
2627 
2628 	if (sock_flag(sk, SOCK_RCVTSTAMP))
2629 		__sock_recv_timestamp(msg, sk, skb);
2630 
2631 	memset(&scm, 0, sizeof(scm));
2632 
2633 	unix_skb_to_scm(skb, &scm);
2634 
2635 	if (!(flags & MSG_PEEK)) {
2636 		if (UNIXCB(skb).fp)
2637 			unix_detach_fds(&scm, skb);
2638 
2639 		sk_peek_offset_bwd(sk, skb->len);
2640 	} else {
2641 		/* It is questionable: on PEEK we could:
2642 		   - do not return fds - good, but too simple 8)
2643 		   - return fds, and do not return them on read (old strategy,
2644 		     apparently wrong)
2645 		   - clone fds (I chose it for now, it is the most universal
2646 		     solution)
2647 
2648 		   POSIX 1003.1g does not actually define this clearly
2649 		   at all. POSIX 1003.1g doesn't define a lot of things
2650 		   clearly however!
2651 
2652 		*/
2653 
2654 		sk_peek_offset_fwd(sk, size);
2655 
2656 		if (UNIXCB(skb).fp)
2657 			unix_peek_fds(&scm, skb);
2658 	}
2659 	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2660 
2661 	scm_recv_unix(sock, msg, &scm, flags);
2662 
2663 out_free:
2664 	skb_free_datagram(sk, skb);
2665 	mutex_unlock(&u->iolock);
2666 out:
2667 	return err;
2668 }
2669 
2670 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
2671 			      int flags)
2672 {
2673 	struct sock *sk = sock->sk;
2674 
2675 #ifdef CONFIG_BPF_SYSCALL
2676 	const struct proto *prot = READ_ONCE(sk->sk_prot);
2677 
2678 	if (prot != &unix_dgram_proto)
2679 		return prot->recvmsg(sk, msg, size, flags);
2680 #endif
2681 	return __unix_dgram_recvmsg(sk, msg, size, flags);
2682 }
2683 
2684 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
2685 {
2686 	struct unix_sock *u = unix_sk(sk);
2687 	struct sk_buff *skb;
2688 	int err;
2689 
2690 	mutex_lock(&u->iolock);
2691 
2692 	skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
2693 	if (!skb) {
2694 		mutex_unlock(&u->iolock);
2695 		return err;
2696 	}
2697 
2698 	unix_orphan_scm(sk, skb);
2699 
2700 	mutex_unlock(&u->iolock);
2701 
2702 	return recv_actor(sk, skb);
2703 }
2704 
2705 /*
2706  *	Sleep until more data has arrived. But check for races..
2707  */
2708 static long unix_stream_data_wait(struct sock *sk, long timeo,
2709 				  struct sk_buff *last, bool freezable)
2710 {
2711 	unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE;
2712 	struct sk_buff *tail;
2713 	DEFINE_WAIT(wait);
2714 
2715 	unix_state_lock(sk);
2716 
2717 	for (;;) {
2718 		prepare_to_wait(sk_sleep(sk), &wait, state);
2719 
2720 		tail = skb_peek_tail(&sk->sk_receive_queue);
2721 		if (tail != last ||
2722 		    sk->sk_err ||
2723 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
2724 		    signal_pending(current) ||
2725 		    !timeo)
2726 			break;
2727 
2728 		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2729 		unix_state_unlock(sk);
2730 		timeo = schedule_timeout(timeo);
2731 		unix_state_lock(sk);
2732 
2733 		if (sock_flag(sk, SOCK_DEAD))
2734 			break;
2735 
2736 		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2737 	}
2738 
2739 	finish_wait(sk_sleep(sk), &wait);
2740 	unix_state_unlock(sk);
2741 	return timeo;
2742 }
2743 
2744 struct unix_stream_read_state {
2745 	int (*recv_actor)(struct sk_buff *, int, int,
2746 			  struct unix_stream_read_state *);
2747 	struct socket *socket;
2748 	struct msghdr *msg;
2749 	struct pipe_inode_info *pipe;
2750 	size_t size;
2751 	int flags;
2752 	unsigned int splice_flags;
2753 };
2754 
2755 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2756 static int unix_stream_recv_urg(struct unix_stream_read_state *state)
2757 {
2758 	struct sk_buff *oob_skb, *read_skb = NULL;
2759 	struct socket *sock = state->socket;
2760 	struct sock *sk = sock->sk;
2761 	struct unix_sock *u = unix_sk(sk);
2762 	int chunk = 1;
2763 
2764 	mutex_lock(&u->iolock);
2765 	unix_state_lock(sk);
2766 	spin_lock(&sk->sk_receive_queue.lock);
2767 
2768 	if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
2769 		spin_unlock(&sk->sk_receive_queue.lock);
2770 		unix_state_unlock(sk);
2771 		mutex_unlock(&u->iolock);
2772 		return -EINVAL;
2773 	}
2774 
2775 	oob_skb = u->oob_skb;
2776 
2777 	if (!(state->flags & MSG_PEEK)) {
2778 		WRITE_ONCE(u->oob_skb, NULL);
2779 		WRITE_ONCE(u->inq_len, u->inq_len - 1);
2780 
2781 		if (oob_skb->prev != (struct sk_buff *)&sk->sk_receive_queue &&
2782 		    !unix_skb_len(oob_skb->prev)) {
2783 			read_skb = oob_skb->prev;
2784 			__skb_unlink(read_skb, &sk->sk_receive_queue);
2785 		}
2786 	}
2787 
2788 	spin_unlock(&sk->sk_receive_queue.lock);
2789 	unix_state_unlock(sk);
2790 
2791 	chunk = state->recv_actor(oob_skb, 0, chunk, state);
2792 
2793 	if (!(state->flags & MSG_PEEK))
2794 		UNIXCB(oob_skb).consumed += 1;
2795 
2796 	mutex_unlock(&u->iolock);
2797 
2798 	consume_skb(read_skb);
2799 
2800 	if (chunk < 0)
2801 		return -EFAULT;
2802 
2803 	state->msg->msg_flags |= MSG_OOB;
2804 	return 1;
2805 }
2806 
2807 static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
2808 				  int flags, int copied)
2809 {
2810 	struct sk_buff *read_skb = NULL, *unread_skb = NULL;
2811 	struct unix_sock *u = unix_sk(sk);
2812 
2813 	if (likely(unix_skb_len(skb) && skb != READ_ONCE(u->oob_skb)))
2814 		return skb;
2815 
2816 	spin_lock(&sk->sk_receive_queue.lock);
2817 
2818 	if (!unix_skb_len(skb)) {
2819 		if (copied && (!u->oob_skb || skb == u->oob_skb)) {
2820 			skb = NULL;
2821 		} else if (flags & MSG_PEEK) {
2822 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2823 		} else {
2824 			read_skb = skb;
2825 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2826 			__skb_unlink(read_skb, &sk->sk_receive_queue);
2827 		}
2828 
2829 		if (!skb)
2830 			goto unlock;
2831 	}
2832 
2833 	if (skb != u->oob_skb)
2834 		goto unlock;
2835 
2836 	if (copied) {
2837 		skb = NULL;
2838 	} else if (!(flags & MSG_PEEK)) {
2839 		WRITE_ONCE(u->oob_skb, NULL);
2840 
2841 		if (!sock_flag(sk, SOCK_URGINLINE)) {
2842 			__skb_unlink(skb, &sk->sk_receive_queue);
2843 			unread_skb = skb;
2844 			skb = skb_peek(&sk->sk_receive_queue);
2845 		}
2846 	} else if (!sock_flag(sk, SOCK_URGINLINE)) {
2847 		skb = skb_peek_next(skb, &sk->sk_receive_queue);
2848 	}
2849 
2850 unlock:
2851 	spin_unlock(&sk->sk_receive_queue.lock);
2852 
2853 	consume_skb(read_skb);
2854 	kfree_skb_reason(unread_skb, SKB_DROP_REASON_UNIX_SKIP_OOB);
2855 
2856 	return skb;
2857 }
2858 #endif
2859 
2860 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
2861 {
2862 	struct sk_buff_head *queue = &sk->sk_receive_queue;
2863 	struct unix_sock *u = unix_sk(sk);
2864 	struct sk_buff *skb;
2865 	int err;
2866 
2867 	if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED))
2868 		return -ENOTCONN;
2869 
2870 	err = sock_error(sk);
2871 	if (err)
2872 		return err;
2873 
2874 	mutex_lock(&u->iolock);
2875 	spin_lock(&queue->lock);
2876 
2877 	skb = __skb_dequeue(queue);
2878 	if (!skb) {
2879 		spin_unlock(&queue->lock);
2880 		mutex_unlock(&u->iolock);
2881 		return -EAGAIN;
2882 	}
2883 
2884 	WRITE_ONCE(u->inq_len, u->inq_len - unix_skb_len(skb));
2885 
2886 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2887 	if (skb == u->oob_skb) {
2888 		WRITE_ONCE(u->oob_skb, NULL);
2889 		spin_unlock(&queue->lock);
2890 		mutex_unlock(&u->iolock);
2891 
2892 		kfree_skb_reason(skb, SKB_DROP_REASON_UNIX_SKIP_OOB);
2893 		return -EAGAIN;
2894 	}
2895 #endif
2896 
2897 	spin_unlock(&queue->lock);
2898 
2899 	unix_orphan_scm(sk, skb);
2900 
2901 	mutex_unlock(&u->iolock);
2902 
2903 	return recv_actor(sk, skb);
2904 }
2905 
2906 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2907 				    bool freezable)
2908 {
2909 	int noblock = state->flags & MSG_DONTWAIT;
2910 	struct socket *sock = state->socket;
2911 	struct msghdr *msg = state->msg;
2912 	struct sock *sk = sock->sk;
2913 	size_t size = state->size;
2914 	int flags = state->flags;
2915 	bool check_creds = false;
2916 	struct scm_cookie scm;
2917 	struct unix_sock *u;
2918 	int copied = 0;
2919 	int err = 0;
2920 	long timeo;
2921 	int target;
2922 	int skip;
2923 
2924 	if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)) {
2925 		err = -EINVAL;
2926 		goto out;
2927 	}
2928 
2929 	if (unlikely(flags & MSG_OOB)) {
2930 		err = -EOPNOTSUPP;
2931 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2932 		err = unix_stream_recv_urg(state);
2933 #endif
2934 		goto out;
2935 	}
2936 
2937 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2938 	timeo = sock_rcvtimeo(sk, noblock);
2939 
2940 	memset(&scm, 0, sizeof(scm));
2941 
2942 	u = unix_sk(sk);
2943 
2944 redo:
2945 	/* Lock the socket to prevent queue disordering
2946 	 * while sleeps in memcpy_tomsg
2947 	 */
2948 	mutex_lock(&u->iolock);
2949 
2950 	skip = max(sk_peek_offset(sk, flags), 0);
2951 
2952 	do {
2953 		struct sk_buff *skb, *last;
2954 		int chunk;
2955 
2956 		unix_state_lock(sk);
2957 		if (sock_flag(sk, SOCK_DEAD)) {
2958 			err = -ECONNRESET;
2959 			goto unlock;
2960 		}
2961 		last = skb = skb_peek(&sk->sk_receive_queue);
2962 
2963 again:
2964 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2965 		if (skb) {
2966 			skb = manage_oob(skb, sk, flags, copied);
2967 			if (!skb && copied) {
2968 				unix_state_unlock(sk);
2969 				break;
2970 			}
2971 		}
2972 #endif
2973 		if (skb == NULL) {
2974 			if (copied >= target)
2975 				goto unlock;
2976 
2977 			/*
2978 			 *	POSIX 1003.1g mandates this order.
2979 			 */
2980 
2981 			err = sock_error(sk);
2982 			if (err)
2983 				goto unlock;
2984 			if (sk->sk_shutdown & RCV_SHUTDOWN)
2985 				goto unlock;
2986 
2987 			unix_state_unlock(sk);
2988 			if (!timeo) {
2989 				err = -EAGAIN;
2990 				break;
2991 			}
2992 
2993 			mutex_unlock(&u->iolock);
2994 
2995 			timeo = unix_stream_data_wait(sk, timeo, last, freezable);
2996 
2997 			if (signal_pending(current)) {
2998 				err = sock_intr_errno(timeo);
2999 				scm_destroy(&scm);
3000 				goto out;
3001 			}
3002 
3003 			goto redo;
3004 unlock:
3005 			unix_state_unlock(sk);
3006 			break;
3007 		}
3008 
3009 		while (skip >= unix_skb_len(skb)) {
3010 			skip -= unix_skb_len(skb);
3011 			last = skb;
3012 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
3013 			if (!skb)
3014 				goto again;
3015 		}
3016 
3017 		unix_state_unlock(sk);
3018 
3019 		if (check_creds) {
3020 			/* Never glue messages from different writers */
3021 			if (!unix_skb_scm_eq(skb, &scm))
3022 				break;
3023 		} else if (unix_may_passcred(sk)) {
3024 			/* Copy credentials */
3025 			unix_skb_to_scm(skb, &scm);
3026 			check_creds = true;
3027 		}
3028 
3029 		/* Copy address just once */
3030 		if (msg && msg->msg_name) {
3031 			DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
3032 
3033 			unix_copy_addr(msg, skb->sk);
3034 			BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, msg->msg_name,
3035 							      &msg->msg_namelen);
3036 
3037 			sunaddr = NULL;
3038 		}
3039 
3040 		chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
3041 		chunk = state->recv_actor(skb, skip, chunk, state);
3042 		if (chunk < 0) {
3043 			if (copied == 0)
3044 				copied = -EFAULT;
3045 			break;
3046 		}
3047 		copied += chunk;
3048 		size -= chunk;
3049 
3050 		/* Mark read part of skb as used */
3051 		if (!(flags & MSG_PEEK)) {
3052 			UNIXCB(skb).consumed += chunk;
3053 
3054 			sk_peek_offset_bwd(sk, chunk);
3055 
3056 			if (UNIXCB(skb).fp) {
3057 				scm_stat_del(sk, skb);
3058 				unix_detach_fds(&scm, skb);
3059 			}
3060 
3061 			spin_lock(&sk->sk_receive_queue.lock);
3062 			WRITE_ONCE(u->inq_len, u->inq_len - chunk);
3063 			if (unix_skb_len(skb)) {
3064 				spin_unlock(&sk->sk_receive_queue.lock);
3065 				break;
3066 			}
3067 			__skb_unlink(skb, &sk->sk_receive_queue);
3068 			spin_unlock(&sk->sk_receive_queue.lock);
3069 
3070 			consume_skb(skb);
3071 
3072 			if (scm.fp)
3073 				break;
3074 		} else {
3075 			/* It is questionable, see note in unix_dgram_recvmsg.
3076 			 */
3077 			if (UNIXCB(skb).fp)
3078 				unix_peek_fds(&scm, skb);
3079 
3080 			sk_peek_offset_fwd(sk, chunk);
3081 
3082 			if (UNIXCB(skb).fp)
3083 				break;
3084 
3085 			skip = 0;
3086 			last = skb;
3087 			unix_state_lock(sk);
3088 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
3089 			if (skb)
3090 				goto again;
3091 			unix_state_unlock(sk);
3092 			break;
3093 		}
3094 	} while (size);
3095 
3096 	mutex_unlock(&u->iolock);
3097 	if (msg) {
3098 		bool do_cmsg = READ_ONCE(u->recvmsg_inq);
3099 
3100 		scm_recv_unix(sock, msg, &scm, flags);
3101 
3102 		if ((do_cmsg | msg->msg_get_inq) && (copied ?: err) >= 0) {
3103 			msg->msg_inq = READ_ONCE(u->inq_len);
3104 			if (do_cmsg)
3105 				put_cmsg(msg, SOL_SOCKET, SCM_INQ,
3106 					 sizeof(msg->msg_inq), &msg->msg_inq);
3107 		}
3108 	} else {
3109 		scm_destroy(&scm);
3110 	}
3111 out:
3112 	return copied ? : err;
3113 }
3114 
3115 static int unix_stream_read_actor(struct sk_buff *skb,
3116 				  int skip, int chunk,
3117 				  struct unix_stream_read_state *state)
3118 {
3119 	int ret;
3120 
3121 	ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
3122 				    state->msg, chunk);
3123 	return ret ?: chunk;
3124 }
3125 
3126 int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg,
3127 			  size_t size, int flags)
3128 {
3129 	struct unix_stream_read_state state = {
3130 		.recv_actor = unix_stream_read_actor,
3131 		.socket = sk->sk_socket,
3132 		.msg = msg,
3133 		.size = size,
3134 		.flags = flags
3135 	};
3136 
3137 	return unix_stream_read_generic(&state, true);
3138 }
3139 
3140 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
3141 			       size_t size, int flags)
3142 {
3143 	struct unix_stream_read_state state = {
3144 		.recv_actor = unix_stream_read_actor,
3145 		.socket = sock,
3146 		.msg = msg,
3147 		.size = size,
3148 		.flags = flags
3149 	};
3150 
3151 #ifdef CONFIG_BPF_SYSCALL
3152 	struct sock *sk = sock->sk;
3153 	const struct proto *prot = READ_ONCE(sk->sk_prot);
3154 
3155 	if (prot != &unix_stream_proto)
3156 		return prot->recvmsg(sk, msg, size, flags);
3157 #endif
3158 	return unix_stream_read_generic(&state, true);
3159 }
3160 
3161 static int unix_stream_splice_actor(struct sk_buff *skb,
3162 				    int skip, int chunk,
3163 				    struct unix_stream_read_state *state)
3164 {
3165 	return skb_splice_bits(skb, state->socket->sk,
3166 			       UNIXCB(skb).consumed + skip,
3167 			       state->pipe, chunk, state->splice_flags);
3168 }
3169 
3170 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
3171 				       struct pipe_inode_info *pipe,
3172 				       size_t size, unsigned int flags)
3173 {
3174 	struct unix_stream_read_state state = {
3175 		.recv_actor = unix_stream_splice_actor,
3176 		.socket = sock,
3177 		.pipe = pipe,
3178 		.size = size,
3179 		.splice_flags = flags,
3180 	};
3181 
3182 	if (unlikely(*ppos))
3183 		return -ESPIPE;
3184 
3185 	if (sock->file->f_flags & O_NONBLOCK ||
3186 	    flags & SPLICE_F_NONBLOCK)
3187 		state.flags = MSG_DONTWAIT;
3188 
3189 	return unix_stream_read_generic(&state, false);
3190 }
3191 
3192 static int unix_shutdown(struct socket *sock, int mode)
3193 {
3194 	struct sock *sk = sock->sk;
3195 	struct sock *other;
3196 
3197 	if (mode < SHUT_RD || mode > SHUT_RDWR)
3198 		return -EINVAL;
3199 	/* This maps:
3200 	 * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
3201 	 * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
3202 	 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
3203 	 */
3204 	++mode;
3205 
3206 	unix_state_lock(sk);
3207 	WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode);
3208 	other = unix_peer(sk);
3209 	if (other)
3210 		sock_hold(other);
3211 	unix_state_unlock(sk);
3212 	sk->sk_state_change(sk);
3213 
3214 	if (other &&
3215 		(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
3216 
3217 		int peer_mode = 0;
3218 		const struct proto *prot = READ_ONCE(other->sk_prot);
3219 
3220 		if (prot->unhash)
3221 			prot->unhash(other);
3222 		if (mode&RCV_SHUTDOWN)
3223 			peer_mode |= SEND_SHUTDOWN;
3224 		if (mode&SEND_SHUTDOWN)
3225 			peer_mode |= RCV_SHUTDOWN;
3226 		unix_state_lock(other);
3227 		WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode);
3228 		unix_state_unlock(other);
3229 		other->sk_state_change(other);
3230 		if (peer_mode == SHUTDOWN_MASK)
3231 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
3232 		else if (peer_mode & RCV_SHUTDOWN)
3233 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
3234 	}
3235 	if (other)
3236 		sock_put(other);
3237 
3238 	return 0;
3239 }
3240 
3241 long unix_inq_len(struct sock *sk)
3242 {
3243 	struct sk_buff *skb;
3244 	long amount = 0;
3245 
3246 	if (READ_ONCE(sk->sk_state) == TCP_LISTEN)
3247 		return -EINVAL;
3248 
3249 	if (sk->sk_type == SOCK_STREAM)
3250 		return READ_ONCE(unix_sk(sk)->inq_len);
3251 
3252 	spin_lock(&sk->sk_receive_queue.lock);
3253 	if (sk->sk_type == SOCK_SEQPACKET) {
3254 		skb_queue_walk(&sk->sk_receive_queue, skb)
3255 			amount += unix_skb_len(skb);
3256 	} else {
3257 		skb = skb_peek(&sk->sk_receive_queue);
3258 		if (skb)
3259 			amount = skb->len;
3260 	}
3261 	spin_unlock(&sk->sk_receive_queue.lock);
3262 
3263 	return amount;
3264 }
3265 EXPORT_SYMBOL_GPL(unix_inq_len);
3266 
3267 long unix_outq_len(struct sock *sk)
3268 {
3269 	return sk_wmem_alloc_get(sk);
3270 }
3271 EXPORT_SYMBOL_GPL(unix_outq_len);
3272 
3273 static int unix_open_file(struct sock *sk)
3274 {
3275 	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
3276 		return -EPERM;
3277 
3278 	if (!smp_load_acquire(&unix_sk(sk)->addr))
3279 		return -ENOENT;
3280 
3281 	if (!unix_sk(sk)->path.dentry)
3282 		return -ENOENT;
3283 
3284 	return FD_ADD(O_CLOEXEC, dentry_open(&unix_sk(sk)->path, O_PATH, current_cred()));
3285 }
3286 
3287 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3288 {
3289 	struct sock *sk = sock->sk;
3290 	long amount = 0;
3291 	int err;
3292 
3293 	switch (cmd) {
3294 	case SIOCOUTQ:
3295 		amount = unix_outq_len(sk);
3296 		err = put_user(amount, (int __user *)arg);
3297 		break;
3298 	case SIOCINQ:
3299 		amount = unix_inq_len(sk);
3300 		if (amount < 0)
3301 			err = amount;
3302 		else
3303 			err = put_user(amount, (int __user *)arg);
3304 		break;
3305 	case SIOCUNIXFILE:
3306 		err = unix_open_file(sk);
3307 		break;
3308 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3309 	case SIOCATMARK:
3310 		{
3311 			struct unix_sock *u = unix_sk(sk);
3312 			struct sk_buff *skb;
3313 			int answ = 0;
3314 
3315 			if (sk->sk_type != SOCK_STREAM)
3316 				return -EOPNOTSUPP;
3317 
3318 			mutex_lock(&u->iolock);
3319 
3320 			skb = skb_peek(&sk->sk_receive_queue);
3321 			if (skb) {
3322 				struct sk_buff *oob_skb = READ_ONCE(u->oob_skb);
3323 				struct sk_buff *next_skb;
3324 
3325 				next_skb = skb_peek_next(skb, &sk->sk_receive_queue);
3326 
3327 				if (skb == oob_skb ||
3328 				    (!unix_skb_len(skb) &&
3329 				     (!oob_skb || next_skb == oob_skb)))
3330 					answ = 1;
3331 			}
3332 
3333 			mutex_unlock(&u->iolock);
3334 
3335 			err = put_user(answ, (int __user *)arg);
3336 		}
3337 		break;
3338 #endif
3339 	default:
3340 		err = -ENOIOCTLCMD;
3341 		break;
3342 	}
3343 	return err;
3344 }
3345 
3346 #ifdef CONFIG_COMPAT
3347 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3348 {
3349 	return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
3350 }
3351 #endif
3352 
3353 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
3354 {
3355 	struct sock *sk = sock->sk;
3356 	unsigned char state;
3357 	__poll_t mask;
3358 	u8 shutdown;
3359 
3360 	sock_poll_wait(file, sock, wait);
3361 	mask = 0;
3362 	shutdown = READ_ONCE(sk->sk_shutdown);
3363 	state = READ_ONCE(sk->sk_state);
3364 
3365 	/* exceptional events? */
3366 	if (READ_ONCE(sk->sk_err))
3367 		mask |= EPOLLERR;
3368 	if (shutdown == SHUTDOWN_MASK)
3369 		mask |= EPOLLHUP;
3370 	if (shutdown & RCV_SHUTDOWN)
3371 		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3372 
3373 	/* readable? */
3374 	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3375 		mask |= EPOLLIN | EPOLLRDNORM;
3376 	if (sk_is_readable(sk))
3377 		mask |= EPOLLIN | EPOLLRDNORM;
3378 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3379 	if (READ_ONCE(unix_sk(sk)->oob_skb))
3380 		mask |= EPOLLPRI;
3381 #endif
3382 
3383 	/* Connection-based need to check for termination and startup */
3384 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
3385 	    state == TCP_CLOSE)
3386 		mask |= EPOLLHUP;
3387 
3388 	/*
3389 	 * we set writable also when the other side has shut down the
3390 	 * connection. This prevents stuck sockets.
3391 	 */
3392 	if (unix_writable(sk, state))
3393 		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3394 
3395 	return mask;
3396 }
3397 
3398 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
3399 				    poll_table *wait)
3400 {
3401 	struct sock *sk = sock->sk, *other;
3402 	unsigned int writable;
3403 	unsigned char state;
3404 	__poll_t mask;
3405 	u8 shutdown;
3406 
3407 	sock_poll_wait(file, sock, wait);
3408 	mask = 0;
3409 	shutdown = READ_ONCE(sk->sk_shutdown);
3410 	state = READ_ONCE(sk->sk_state);
3411 
3412 	/* exceptional events? */
3413 	if (READ_ONCE(sk->sk_err) ||
3414 	    !skb_queue_empty_lockless(&sk->sk_error_queue))
3415 		mask |= EPOLLERR |
3416 			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
3417 
3418 	if (shutdown & RCV_SHUTDOWN)
3419 		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3420 	if (shutdown == SHUTDOWN_MASK)
3421 		mask |= EPOLLHUP;
3422 
3423 	/* readable? */
3424 	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3425 		mask |= EPOLLIN | EPOLLRDNORM;
3426 	if (sk_is_readable(sk))
3427 		mask |= EPOLLIN | EPOLLRDNORM;
3428 
3429 	/* Connection-based need to check for termination and startup */
3430 	if (sk->sk_type == SOCK_SEQPACKET && state == TCP_CLOSE)
3431 		mask |= EPOLLHUP;
3432 
3433 	/* No write status requested, avoid expensive OUT tests. */
3434 	if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
3435 		return mask;
3436 
3437 	writable = unix_writable(sk, state);
3438 	if (writable) {
3439 		unix_state_lock(sk);
3440 
3441 		other = unix_peer(sk);
3442 		if (other && unix_peer(other) != sk &&
3443 		    unix_recvq_full_lockless(other) &&
3444 		    unix_dgram_peer_wake_me(sk, other))
3445 			writable = 0;
3446 
3447 		unix_state_unlock(sk);
3448 	}
3449 
3450 	if (writable)
3451 		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3452 	else
3453 		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
3454 
3455 	return mask;
3456 }
3457 
3458 #ifdef CONFIG_PROC_FS
3459 
3460 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
3461 
3462 #define get_bucket(x) ((x) >> BUCKET_SPACE)
3463 #define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1))
3464 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
3465 
3466 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
3467 {
3468 	unsigned long offset = get_offset(*pos);
3469 	unsigned long bucket = get_bucket(*pos);
3470 	unsigned long count = 0;
3471 	struct sock *sk;
3472 
3473 	for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]);
3474 	     sk; sk = sk_next(sk)) {
3475 		if (++count == offset)
3476 			break;
3477 	}
3478 
3479 	return sk;
3480 }
3481 
3482 static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos)
3483 {
3484 	unsigned long bucket = get_bucket(*pos);
3485 	struct net *net = seq_file_net(seq);
3486 	struct sock *sk;
3487 
3488 	while (bucket < UNIX_HASH_SIZE) {
3489 		spin_lock(&net->unx.table.locks[bucket]);
3490 
3491 		sk = unix_from_bucket(seq, pos);
3492 		if (sk)
3493 			return sk;
3494 
3495 		spin_unlock(&net->unx.table.locks[bucket]);
3496 
3497 		*pos = set_bucket_offset(++bucket, 1);
3498 	}
3499 
3500 	return NULL;
3501 }
3502 
3503 static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk,
3504 				  loff_t *pos)
3505 {
3506 	unsigned long bucket = get_bucket(*pos);
3507 
3508 	sk = sk_next(sk);
3509 	if (sk)
3510 		return sk;
3511 
3512 
3513 	spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]);
3514 
3515 	*pos = set_bucket_offset(++bucket, 1);
3516 
3517 	return unix_get_first(seq, pos);
3518 }
3519 
3520 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
3521 {
3522 	if (!*pos)
3523 		return SEQ_START_TOKEN;
3524 
3525 	return unix_get_first(seq, pos);
3526 }
3527 
3528 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3529 {
3530 	++*pos;
3531 
3532 	if (v == SEQ_START_TOKEN)
3533 		return unix_get_first(seq, pos);
3534 
3535 	return unix_get_next(seq, v, pos);
3536 }
3537 
3538 static void unix_seq_stop(struct seq_file *seq, void *v)
3539 {
3540 	struct sock *sk = v;
3541 
3542 	if (sk)
3543 		spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]);
3544 }
3545 
3546 static int unix_seq_show(struct seq_file *seq, void *v)
3547 {
3548 
3549 	if (v == SEQ_START_TOKEN)
3550 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
3551 			 "Inode Path\n");
3552 	else {
3553 		struct sock *s = v;
3554 		struct unix_sock *u = unix_sk(s);
3555 		unix_state_lock(s);
3556 
3557 		seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5llu",
3558 			s,
3559 			refcount_read(&s->sk_refcnt),
3560 			0,
3561 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
3562 			s->sk_type,
3563 			s->sk_socket ?
3564 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
3565 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
3566 			sock_i_ino(s));
3567 
3568 		if (u->addr) {	// under a hash table lock here
3569 			int i, len;
3570 			seq_putc(seq, ' ');
3571 
3572 			i = 0;
3573 			len = u->addr->len -
3574 				offsetof(struct sockaddr_un, sun_path);
3575 			if (u->addr->name->sun_path[0]) {
3576 				len--;
3577 			} else {
3578 				seq_putc(seq, '@');
3579 				i++;
3580 			}
3581 			for ( ; i < len; i++)
3582 				seq_putc(seq, u->addr->name->sun_path[i] ?:
3583 					 '@');
3584 		}
3585 		unix_state_unlock(s);
3586 		seq_putc(seq, '\n');
3587 	}
3588 
3589 	return 0;
3590 }
3591 
3592 static const struct seq_operations unix_seq_ops = {
3593 	.start  = unix_seq_start,
3594 	.next   = unix_seq_next,
3595 	.stop   = unix_seq_stop,
3596 	.show   = unix_seq_show,
3597 };
3598 
3599 #ifdef CONFIG_BPF_SYSCALL
3600 struct bpf_unix_iter_state {
3601 	struct seq_net_private p;
3602 	unsigned int cur_sk;
3603 	unsigned int end_sk;
3604 	unsigned int max_sk;
3605 	struct sock **batch;
3606 	bool st_bucket_done;
3607 };
3608 
3609 struct bpf_iter__unix {
3610 	__bpf_md_ptr(struct bpf_iter_meta *, meta);
3611 	__bpf_md_ptr(struct unix_sock *, unix_sk);
3612 	uid_t uid __aligned(8);
3613 };
3614 
3615 static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
3616 			      struct unix_sock *unix_sk, uid_t uid)
3617 {
3618 	struct bpf_iter__unix ctx;
3619 
3620 	meta->seq_num--;  /* skip SEQ_START_TOKEN */
3621 	ctx.meta = meta;
3622 	ctx.unix_sk = unix_sk;
3623 	ctx.uid = uid;
3624 	return bpf_iter_run_prog(prog, &ctx);
3625 }
3626 
3627 static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)
3628 
3629 {
3630 	struct bpf_unix_iter_state *iter = seq->private;
3631 	unsigned int expected = 1;
3632 	struct sock *sk;
3633 
3634 	sock_hold(start_sk);
3635 	iter->batch[iter->end_sk++] = start_sk;
3636 
3637 	for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) {
3638 		if (iter->end_sk < iter->max_sk) {
3639 			sock_hold(sk);
3640 			iter->batch[iter->end_sk++] = sk;
3641 		}
3642 
3643 		expected++;
3644 	}
3645 
3646 	spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]);
3647 
3648 	return expected;
3649 }
3650 
3651 static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter)
3652 {
3653 	while (iter->cur_sk < iter->end_sk)
3654 		sock_put(iter->batch[iter->cur_sk++]);
3655 }
3656 
3657 static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter,
3658 				       unsigned int new_batch_sz)
3659 {
3660 	struct sock **new_batch;
3661 
3662 	new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
3663 			     GFP_USER | __GFP_NOWARN);
3664 	if (!new_batch)
3665 		return -ENOMEM;
3666 
3667 	bpf_iter_unix_put_batch(iter);
3668 	kvfree(iter->batch);
3669 	iter->batch = new_batch;
3670 	iter->max_sk = new_batch_sz;
3671 
3672 	return 0;
3673 }
3674 
3675 static struct sock *bpf_iter_unix_batch(struct seq_file *seq,
3676 					loff_t *pos)
3677 {
3678 	struct bpf_unix_iter_state *iter = seq->private;
3679 	unsigned int expected;
3680 	bool resized = false;
3681 	struct sock *sk;
3682 
3683 	if (iter->st_bucket_done)
3684 		*pos = set_bucket_offset(get_bucket(*pos) + 1, 1);
3685 
3686 again:
3687 	/* Get a new batch */
3688 	iter->cur_sk = 0;
3689 	iter->end_sk = 0;
3690 
3691 	sk = unix_get_first(seq, pos);
3692 	if (!sk)
3693 		return NULL; /* Done */
3694 
3695 	expected = bpf_iter_unix_hold_batch(seq, sk);
3696 
3697 	if (iter->end_sk == expected) {
3698 		iter->st_bucket_done = true;
3699 		return sk;
3700 	}
3701 
3702 	if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) {
3703 		resized = true;
3704 		goto again;
3705 	}
3706 
3707 	return sk;
3708 }
3709 
3710 static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos)
3711 {
3712 	if (!*pos)
3713 		return SEQ_START_TOKEN;
3714 
3715 	/* bpf iter does not support lseek, so it always
3716 	 * continue from where it was stop()-ped.
3717 	 */
3718 	return bpf_iter_unix_batch(seq, pos);
3719 }
3720 
3721 static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3722 {
3723 	struct bpf_unix_iter_state *iter = seq->private;
3724 	struct sock *sk;
3725 
3726 	/* Whenever seq_next() is called, the iter->cur_sk is
3727 	 * done with seq_show(), so advance to the next sk in
3728 	 * the batch.
3729 	 */
3730 	if (iter->cur_sk < iter->end_sk)
3731 		sock_put(iter->batch[iter->cur_sk++]);
3732 
3733 	++*pos;
3734 
3735 	if (iter->cur_sk < iter->end_sk)
3736 		sk = iter->batch[iter->cur_sk];
3737 	else
3738 		sk = bpf_iter_unix_batch(seq, pos);
3739 
3740 	return sk;
3741 }
3742 
3743 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
3744 {
3745 	struct bpf_iter_meta meta;
3746 	struct bpf_prog *prog;
3747 	struct sock *sk = v;
3748 	uid_t uid;
3749 	int ret;
3750 
3751 	if (v == SEQ_START_TOKEN)
3752 		return 0;
3753 
3754 	lock_sock(sk);
3755 	unix_state_lock(sk);
3756 
3757 	if (unlikely(sock_flag(sk, SOCK_DEAD))) {
3758 		ret = SEQ_SKIP;
3759 		goto unlock;
3760 	}
3761 
3762 	uid = from_kuid_munged(seq_user_ns(seq), sk_uid(sk));
3763 	meta.seq = seq;
3764 	prog = bpf_iter_get_info(&meta, false);
3765 	ret = unix_prog_seq_show(prog, &meta, v, uid);
3766 unlock:
3767 	unix_state_unlock(sk);
3768 	release_sock(sk);
3769 	return ret;
3770 }
3771 
3772 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
3773 {
3774 	struct bpf_unix_iter_state *iter = seq->private;
3775 	struct bpf_iter_meta meta;
3776 	struct bpf_prog *prog;
3777 
3778 	if (!v) {
3779 		meta.seq = seq;
3780 		prog = bpf_iter_get_info(&meta, true);
3781 		if (prog)
3782 			(void)unix_prog_seq_show(prog, &meta, v, 0);
3783 	}
3784 
3785 	if (iter->cur_sk < iter->end_sk)
3786 		bpf_iter_unix_put_batch(iter);
3787 }
3788 
3789 static const struct seq_operations bpf_iter_unix_seq_ops = {
3790 	.start	= bpf_iter_unix_seq_start,
3791 	.next	= bpf_iter_unix_seq_next,
3792 	.stop	= bpf_iter_unix_seq_stop,
3793 	.show	= bpf_iter_unix_seq_show,
3794 };
3795 #endif
3796 #endif
3797 
3798 static const struct net_proto_family unix_family_ops = {
3799 	.family = PF_UNIX,
3800 	.create = unix_create,
3801 	.owner	= THIS_MODULE,
3802 };
3803 
3804 
3805 static int __net_init unix_net_init(struct net *net)
3806 {
3807 	int i;
3808 
3809 	net->unx.sysctl_max_dgram_qlen = 10;
3810 	if (unix_sysctl_register(net))
3811 		goto out;
3812 
3813 #ifdef CONFIG_PROC_FS
3814 	if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
3815 			     sizeof(struct seq_net_private)))
3816 		goto err_sysctl;
3817 #endif
3818 
3819 	net->unx.table.locks = kvmalloc_objs(spinlock_t, UNIX_HASH_SIZE);
3820 	if (!net->unx.table.locks)
3821 		goto err_proc;
3822 
3823 	net->unx.table.buckets = kvmalloc_objs(struct hlist_head,
3824 					       UNIX_HASH_SIZE);
3825 	if (!net->unx.table.buckets)
3826 		goto free_locks;
3827 
3828 	for (i = 0; i < UNIX_HASH_SIZE; i++) {
3829 		spin_lock_init(&net->unx.table.locks[i]);
3830 		lock_set_cmp_fn(&net->unx.table.locks[i], unix_table_lock_cmp_fn, NULL);
3831 		INIT_HLIST_HEAD(&net->unx.table.buckets[i]);
3832 	}
3833 
3834 	return 0;
3835 
3836 free_locks:
3837 	kvfree(net->unx.table.locks);
3838 err_proc:
3839 #ifdef CONFIG_PROC_FS
3840 	remove_proc_entry("unix", net->proc_net);
3841 err_sysctl:
3842 #endif
3843 	unix_sysctl_unregister(net);
3844 out:
3845 	return -ENOMEM;
3846 }
3847 
3848 static void __net_exit unix_net_exit(struct net *net)
3849 {
3850 	kvfree(net->unx.table.buckets);
3851 	kvfree(net->unx.table.locks);
3852 	unix_sysctl_unregister(net);
3853 	remove_proc_entry("unix", net->proc_net);
3854 }
3855 
3856 static struct pernet_operations unix_net_ops = {
3857 	.init = unix_net_init,
3858 	.exit = unix_net_exit,
3859 };
3860 
3861 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3862 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
3863 		     struct unix_sock *unix_sk, uid_t uid)
3864 
3865 #define INIT_BATCH_SZ 16
3866 
3867 static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux)
3868 {
3869 	struct bpf_unix_iter_state *iter = priv_data;
3870 	int err;
3871 
3872 	err = bpf_iter_init_seq_net(priv_data, aux);
3873 	if (err)
3874 		return err;
3875 
3876 	err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ);
3877 	if (err) {
3878 		bpf_iter_fini_seq_net(priv_data);
3879 		return err;
3880 	}
3881 
3882 	return 0;
3883 }
3884 
3885 static void bpf_iter_fini_unix(void *priv_data)
3886 {
3887 	struct bpf_unix_iter_state *iter = priv_data;
3888 
3889 	bpf_iter_fini_seq_net(priv_data);
3890 	kvfree(iter->batch);
3891 }
3892 
3893 static const struct bpf_iter_seq_info unix_seq_info = {
3894 	.seq_ops		= &bpf_iter_unix_seq_ops,
3895 	.init_seq_private	= bpf_iter_init_unix,
3896 	.fini_seq_private	= bpf_iter_fini_unix,
3897 	.seq_priv_size		= sizeof(struct bpf_unix_iter_state),
3898 };
3899 
3900 static const struct bpf_func_proto *
3901 bpf_iter_unix_get_func_proto(enum bpf_func_id func_id,
3902 			     const struct bpf_prog *prog)
3903 {
3904 	switch (func_id) {
3905 	case BPF_FUNC_setsockopt:
3906 		return &bpf_sk_setsockopt_proto;
3907 	case BPF_FUNC_getsockopt:
3908 		return &bpf_sk_getsockopt_proto;
3909 	default:
3910 		return NULL;
3911 	}
3912 }
3913 
3914 static struct bpf_iter_reg unix_reg_info = {
3915 	.target			= "unix",
3916 	.ctx_arg_info_size	= 1,
3917 	.ctx_arg_info		= {
3918 		{ offsetof(struct bpf_iter__unix, unix_sk),
3919 		  PTR_TO_BTF_ID_OR_NULL },
3920 	},
3921 	.get_func_proto         = bpf_iter_unix_get_func_proto,
3922 	.seq_info		= &unix_seq_info,
3923 };
3924 
3925 static void __init bpf_iter_register(void)
3926 {
3927 	unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
3928 	if (bpf_iter_reg_target(&unix_reg_info))
3929 		pr_warn("Warning: could not register bpf iterator unix\n");
3930 }
3931 #endif
3932 
3933 static int __init af_unix_init(void)
3934 {
3935 	int i, rc = -1;
3936 
3937 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
3938 
3939 	for (i = 0; i < UNIX_HASH_SIZE / 2; i++) {
3940 		spin_lock_init(&bsd_socket_locks[i]);
3941 		INIT_HLIST_HEAD(&bsd_socket_buckets[i]);
3942 	}
3943 
3944 	rc = proto_register(&unix_dgram_proto, 1);
3945 	if (rc != 0) {
3946 		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3947 		goto out;
3948 	}
3949 
3950 	rc = proto_register(&unix_stream_proto, 1);
3951 	if (rc != 0) {
3952 		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3953 		proto_unregister(&unix_dgram_proto);
3954 		goto out;
3955 	}
3956 
3957 	sock_register(&unix_family_ops);
3958 	register_pernet_subsys(&unix_net_ops);
3959 	unix_bpf_build_proto();
3960 
3961 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3962 	bpf_iter_register();
3963 #endif
3964 
3965 out:
3966 	return rc;
3967 }
3968 
3969 /* Later than subsys_initcall() because we depend on stuff initialised there */
3970 fs_initcall(af_unix_init);
3971