xref: /linux/net/unix/af_unix.c (revision 4b4e41f35cc700e328e693f0c18b24424ab47ff0)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * NET4:	Implementation of BSD Unix domain sockets.
4  *
5  * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
6  *
7  * Fixes:
8  *		Linus Torvalds	:	Assorted bug cures.
9  *		Niibe Yutaka	:	async I/O support.
10  *		Carsten Paeth	:	PF_UNIX check, address fixes.
11  *		Alan Cox	:	Limit size of allocated blocks.
12  *		Alan Cox	:	Fixed the stupid socketpair bug.
13  *		Alan Cox	:	BSD compatibility fine tuning.
14  *		Alan Cox	:	Fixed a bug in connect when interrupted.
15  *		Alan Cox	:	Sorted out a proper draft version of
16  *					file descriptor passing hacked up from
17  *					Mike Shaver's work.
18  *		Marty Leisner	:	Fixes to fd passing
19  *		Nick Nevin	:	recvmsg bugfix.
20  *		Alan Cox	:	Started proper garbage collector
21  *		Heiko EiBfeldt	:	Missing verify_area check
22  *		Alan Cox	:	Started POSIXisms
23  *		Andreas Schwab	:	Replace inode by dentry for proper
24  *					reference counting
25  *		Kirk Petersen	:	Made this a module
26  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
27  *					Lots of bug fixes.
28  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
29  *					by above two patches.
30  *	     Andrea Arcangeli	:	If possible we block in connect(2)
31  *					if the max backlog of the listen socket
32  *					is been reached. This won't break
33  *					old apps and it will avoid huge amount
34  *					of socks hashed (this for unix_gc()
35  *					performances reasons).
36  *					Security fix that limits the max
37  *					number of socks to 2*max_files and
38  *					the number of skb queueable in the
39  *					dgram receiver.
40  *		Artur Skawina   :	Hash function optimizations
41  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
42  *	      Malcolm Beattie   :	Set peercred for socketpair
43  *	     Michal Ostrowski   :       Module initialization cleanup.
44  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
45  *	     				the core infrastructure is doing that
46  *	     				for all net proto families now (2.5.69+)
47  *
48  * Known differences from reference BSD that was tested:
49  *
50  *	[TO FIX]
51  *	ECONNREFUSED is not returned from one end of a connected() socket to the
52  *		other the moment one end closes.
53  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
54  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
55  *	[NOT TO FIX]
56  *	accept() returns a path name even if the connecting socket has closed
57  *		in the meantime (BSD loses the path and gives up).
58  *	accept() returns 0 length path for an unbound connector. BSD returns 16
59  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
61  *	BSD af_unix apparently has connect forgetting to block properly.
62  *		(need to check this with the POSIX spec in detail)
63  *
64  * Differences from 2.0.0-11-... (ANK)
65  *	Bug fixes and improvements.
66  *		- client shutdown killed server socket.
67  *		- removed all useless cli/sti pairs.
68  *
69  *	Semantic changes/extensions.
70  *		- generic control message passing.
71  *		- SCM_CREDENTIALS control message.
72  *		- "Abstract" (not FS based) socket bindings.
73  *		  Abstract names are sequences of bytes (not zero terminated)
74  *		  started by 0, so that this name space does not intersect
75  *		  with BSD names.
76  */
77 
78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
79 
80 #include <linux/module.h>
81 #include <linux/kernel.h>
82 #include <linux/signal.h>
83 #include <linux/sched/signal.h>
84 #include <linux/errno.h>
85 #include <linux/string.h>
86 #include <linux/stat.h>
87 #include <linux/dcache.h>
88 #include <linux/namei.h>
89 #include <linux/socket.h>
90 #include <linux/un.h>
91 #include <linux/fcntl.h>
92 #include <linux/termios.h>
93 #include <linux/sockios.h>
94 #include <linux/net.h>
95 #include <linux/in.h>
96 #include <linux/fs.h>
97 #include <linux/slab.h>
98 #include <linux/uaccess.h>
99 #include <linux/skbuff.h>
100 #include <linux/netdevice.h>
101 #include <net/net_namespace.h>
102 #include <net/sock.h>
103 #include <net/tcp_states.h>
104 #include <net/af_unix.h>
105 #include <linux/proc_fs.h>
106 #include <linux/seq_file.h>
107 #include <net/scm.h>
108 #include <linux/init.h>
109 #include <linux/poll.h>
110 #include <linux/rtnetlink.h>
111 #include <linux/mount.h>
112 #include <net/checksum.h>
113 #include <linux/security.h>
114 #include <linux/freezer.h>
115 #include <linux/file.h>
116 
117 #include "scm.h"
118 
119 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
120 EXPORT_SYMBOL_GPL(unix_socket_table);
121 DEFINE_SPINLOCK(unix_table_lock);
122 EXPORT_SYMBOL_GPL(unix_table_lock);
123 static atomic_long_t unix_nr_socks;
124 
125 
126 static struct hlist_head *unix_sockets_unbound(void *addr)
127 {
128 	unsigned long hash = (unsigned long)addr;
129 
130 	hash ^= hash >> 16;
131 	hash ^= hash >> 8;
132 	hash %= UNIX_HASH_SIZE;
133 	return &unix_socket_table[UNIX_HASH_SIZE + hash];
134 }
135 
136 #define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
137 
138 #ifdef CONFIG_SECURITY_NETWORK
139 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
140 {
141 	UNIXCB(skb).secid = scm->secid;
142 }
143 
144 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
145 {
146 	scm->secid = UNIXCB(skb).secid;
147 }
148 
149 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
150 {
151 	return (scm->secid == UNIXCB(skb).secid);
152 }
153 #else
154 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
155 { }
156 
157 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
158 { }
159 
160 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
161 {
162 	return true;
163 }
164 #endif /* CONFIG_SECURITY_NETWORK */
165 
166 /*
167  *  SMP locking strategy:
168  *    hash table is protected with spinlock unix_table_lock
169  *    each socket state is protected by separate spin lock.
170  */
171 
172 static inline unsigned int unix_hash_fold(__wsum n)
173 {
174 	unsigned int hash = (__force unsigned int)csum_fold(n);
175 
176 	hash ^= hash>>8;
177 	return hash&(UNIX_HASH_SIZE-1);
178 }
179 
180 #define unix_peer(sk) (unix_sk(sk)->peer)
181 
182 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
183 {
184 	return unix_peer(osk) == sk;
185 }
186 
187 static inline int unix_may_send(struct sock *sk, struct sock *osk)
188 {
189 	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
190 }
191 
192 static inline int unix_recvq_full(struct sock const *sk)
193 {
194 	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
195 }
196 
197 struct sock *unix_peer_get(struct sock *s)
198 {
199 	struct sock *peer;
200 
201 	unix_state_lock(s);
202 	peer = unix_peer(s);
203 	if (peer)
204 		sock_hold(peer);
205 	unix_state_unlock(s);
206 	return peer;
207 }
208 EXPORT_SYMBOL_GPL(unix_peer_get);
209 
210 static inline void unix_release_addr(struct unix_address *addr)
211 {
212 	if (refcount_dec_and_test(&addr->refcnt))
213 		kfree(addr);
214 }
215 
216 /*
217  *	Check unix socket name:
218  *		- should be not zero length.
219  *	        - if started by not zero, should be NULL terminated (FS object)
220  *		- if started by zero, it is abstract name.
221  */
222 
223 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
224 {
225 	*hashp = 0;
226 
227 	if (len <= sizeof(short) || len > sizeof(*sunaddr))
228 		return -EINVAL;
229 	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
230 		return -EINVAL;
231 	if (sunaddr->sun_path[0]) {
232 		/*
233 		 * This may look like an off by one error but it is a bit more
234 		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
235 		 * sun_path[108] doesn't as such exist.  However in kernel space
236 		 * we are guaranteed that it is a valid memory location in our
237 		 * kernel address buffer.
238 		 */
239 		((char *)sunaddr)[len] = 0;
240 		len = strlen(sunaddr->sun_path)+1+sizeof(short);
241 		return len;
242 	}
243 
244 	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
245 	return len;
246 }
247 
248 static void __unix_remove_socket(struct sock *sk)
249 {
250 	sk_del_node_init(sk);
251 }
252 
253 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
254 {
255 	WARN_ON(!sk_unhashed(sk));
256 	sk_add_node(sk, list);
257 }
258 
259 static inline void unix_remove_socket(struct sock *sk)
260 {
261 	spin_lock(&unix_table_lock);
262 	__unix_remove_socket(sk);
263 	spin_unlock(&unix_table_lock);
264 }
265 
266 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
267 {
268 	spin_lock(&unix_table_lock);
269 	__unix_insert_socket(list, sk);
270 	spin_unlock(&unix_table_lock);
271 }
272 
273 static struct sock *__unix_find_socket_byname(struct net *net,
274 					      struct sockaddr_un *sunname,
275 					      int len, int type, unsigned int hash)
276 {
277 	struct sock *s;
278 
279 	sk_for_each(s, &unix_socket_table[hash ^ type]) {
280 		struct unix_sock *u = unix_sk(s);
281 
282 		if (!net_eq(sock_net(s), net))
283 			continue;
284 
285 		if (u->addr->len == len &&
286 		    !memcmp(u->addr->name, sunname, len))
287 			return s;
288 	}
289 	return NULL;
290 }
291 
292 static inline struct sock *unix_find_socket_byname(struct net *net,
293 						   struct sockaddr_un *sunname,
294 						   int len, int type,
295 						   unsigned int hash)
296 {
297 	struct sock *s;
298 
299 	spin_lock(&unix_table_lock);
300 	s = __unix_find_socket_byname(net, sunname, len, type, hash);
301 	if (s)
302 		sock_hold(s);
303 	spin_unlock(&unix_table_lock);
304 	return s;
305 }
306 
307 static struct sock *unix_find_socket_byinode(struct inode *i)
308 {
309 	struct sock *s;
310 
311 	spin_lock(&unix_table_lock);
312 	sk_for_each(s,
313 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
314 		struct dentry *dentry = unix_sk(s)->path.dentry;
315 
316 		if (dentry && d_backing_inode(dentry) == i) {
317 			sock_hold(s);
318 			goto found;
319 		}
320 	}
321 	s = NULL;
322 found:
323 	spin_unlock(&unix_table_lock);
324 	return s;
325 }
326 
327 /* Support code for asymmetrically connected dgram sockets
328  *
329  * If a datagram socket is connected to a socket not itself connected
330  * to the first socket (eg, /dev/log), clients may only enqueue more
331  * messages if the present receive queue of the server socket is not
332  * "too large". This means there's a second writeability condition
333  * poll and sendmsg need to test. The dgram recv code will do a wake
334  * up on the peer_wait wait queue of a socket upon reception of a
335  * datagram which needs to be propagated to sleeping would-be writers
336  * since these might not have sent anything so far. This can't be
337  * accomplished via poll_wait because the lifetime of the server
338  * socket might be less than that of its clients if these break their
339  * association with it or if the server socket is closed while clients
340  * are still connected to it and there's no way to inform "a polling
341  * implementation" that it should let go of a certain wait queue
342  *
343  * In order to propagate a wake up, a wait_queue_entry_t of the client
344  * socket is enqueued on the peer_wait queue of the server socket
345  * whose wake function does a wake_up on the ordinary client socket
346  * wait queue. This connection is established whenever a write (or
347  * poll for write) hit the flow control condition and broken when the
348  * association to the server socket is dissolved or after a wake up
349  * was relayed.
350  */
351 
352 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
353 				      void *key)
354 {
355 	struct unix_sock *u;
356 	wait_queue_head_t *u_sleep;
357 
358 	u = container_of(q, struct unix_sock, peer_wake);
359 
360 	__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
361 			    q);
362 	u->peer_wake.private = NULL;
363 
364 	/* relaying can only happen while the wq still exists */
365 	u_sleep = sk_sleep(&u->sk);
366 	if (u_sleep)
367 		wake_up_interruptible_poll(u_sleep, key_to_poll(key));
368 
369 	return 0;
370 }
371 
372 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
373 {
374 	struct unix_sock *u, *u_other;
375 	int rc;
376 
377 	u = unix_sk(sk);
378 	u_other = unix_sk(other);
379 	rc = 0;
380 	spin_lock(&u_other->peer_wait.lock);
381 
382 	if (!u->peer_wake.private) {
383 		u->peer_wake.private = other;
384 		__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
385 
386 		rc = 1;
387 	}
388 
389 	spin_unlock(&u_other->peer_wait.lock);
390 	return rc;
391 }
392 
393 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
394 					    struct sock *other)
395 {
396 	struct unix_sock *u, *u_other;
397 
398 	u = unix_sk(sk);
399 	u_other = unix_sk(other);
400 	spin_lock(&u_other->peer_wait.lock);
401 
402 	if (u->peer_wake.private == other) {
403 		__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
404 		u->peer_wake.private = NULL;
405 	}
406 
407 	spin_unlock(&u_other->peer_wait.lock);
408 }
409 
410 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
411 						   struct sock *other)
412 {
413 	unix_dgram_peer_wake_disconnect(sk, other);
414 	wake_up_interruptible_poll(sk_sleep(sk),
415 				   EPOLLOUT |
416 				   EPOLLWRNORM |
417 				   EPOLLWRBAND);
418 }
419 
420 /* preconditions:
421  *	- unix_peer(sk) == other
422  *	- association is stable
423  */
424 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
425 {
426 	int connected;
427 
428 	connected = unix_dgram_peer_wake_connect(sk, other);
429 
430 	/* If other is SOCK_DEAD, we want to make sure we signal
431 	 * POLLOUT, such that a subsequent write() can get a
432 	 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
433 	 * to other and its full, we will hang waiting for POLLOUT.
434 	 */
435 	if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
436 		return 1;
437 
438 	if (connected)
439 		unix_dgram_peer_wake_disconnect(sk, other);
440 
441 	return 0;
442 }
443 
444 static int unix_writable(const struct sock *sk)
445 {
446 	return sk->sk_state != TCP_LISTEN &&
447 	       (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
448 }
449 
450 static void unix_write_space(struct sock *sk)
451 {
452 	struct socket_wq *wq;
453 
454 	rcu_read_lock();
455 	if (unix_writable(sk)) {
456 		wq = rcu_dereference(sk->sk_wq);
457 		if (skwq_has_sleeper(wq))
458 			wake_up_interruptible_sync_poll(&wq->wait,
459 				EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
460 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
461 	}
462 	rcu_read_unlock();
463 }
464 
465 /* When dgram socket disconnects (or changes its peer), we clear its receive
466  * queue of packets arrived from previous peer. First, it allows to do
467  * flow control based only on wmem_alloc; second, sk connected to peer
468  * may receive messages only from that peer. */
469 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
470 {
471 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
472 		skb_queue_purge(&sk->sk_receive_queue);
473 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
474 
475 		/* If one link of bidirectional dgram pipe is disconnected,
476 		 * we signal error. Messages are lost. Do not make this,
477 		 * when peer was not connected to us.
478 		 */
479 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
480 			other->sk_err = ECONNRESET;
481 			other->sk_error_report(other);
482 		}
483 	}
484 }
485 
486 static void unix_sock_destructor(struct sock *sk)
487 {
488 	struct unix_sock *u = unix_sk(sk);
489 
490 	skb_queue_purge(&sk->sk_receive_queue);
491 
492 	WARN_ON(refcount_read(&sk->sk_wmem_alloc));
493 	WARN_ON(!sk_unhashed(sk));
494 	WARN_ON(sk->sk_socket);
495 	if (!sock_flag(sk, SOCK_DEAD)) {
496 		pr_info("Attempt to release alive unix socket: %p\n", sk);
497 		return;
498 	}
499 
500 	if (u->addr)
501 		unix_release_addr(u->addr);
502 
503 	atomic_long_dec(&unix_nr_socks);
504 	local_bh_disable();
505 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
506 	local_bh_enable();
507 #ifdef UNIX_REFCNT_DEBUG
508 	pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
509 		atomic_long_read(&unix_nr_socks));
510 #endif
511 }
512 
513 static void unix_release_sock(struct sock *sk, int embrion)
514 {
515 	struct unix_sock *u = unix_sk(sk);
516 	struct path path;
517 	struct sock *skpair;
518 	struct sk_buff *skb;
519 	int state;
520 
521 	unix_remove_socket(sk);
522 
523 	/* Clear state */
524 	unix_state_lock(sk);
525 	sock_orphan(sk);
526 	sk->sk_shutdown = SHUTDOWN_MASK;
527 	path	     = u->path;
528 	u->path.dentry = NULL;
529 	u->path.mnt = NULL;
530 	state = sk->sk_state;
531 	sk->sk_state = TCP_CLOSE;
532 	unix_state_unlock(sk);
533 
534 	wake_up_interruptible_all(&u->peer_wait);
535 
536 	skpair = unix_peer(sk);
537 
538 	if (skpair != NULL) {
539 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
540 			unix_state_lock(skpair);
541 			/* No more writes */
542 			skpair->sk_shutdown = SHUTDOWN_MASK;
543 			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
544 				skpair->sk_err = ECONNRESET;
545 			unix_state_unlock(skpair);
546 			skpair->sk_state_change(skpair);
547 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
548 		}
549 
550 		unix_dgram_peer_wake_disconnect(sk, skpair);
551 		sock_put(skpair); /* It may now die */
552 		unix_peer(sk) = NULL;
553 	}
554 
555 	/* Try to flush out this socket. Throw out buffers at least */
556 
557 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
558 		if (state == TCP_LISTEN)
559 			unix_release_sock(skb->sk, 1);
560 		/* passed fds are erased in the kfree_skb hook	      */
561 		UNIXCB(skb).consumed = skb->len;
562 		kfree_skb(skb);
563 	}
564 
565 	if (path.dentry)
566 		path_put(&path);
567 
568 	sock_put(sk);
569 
570 	/* ---- Socket is dead now and most probably destroyed ---- */
571 
572 	/*
573 	 * Fixme: BSD difference: In BSD all sockets connected to us get
574 	 *	  ECONNRESET and we die on the spot. In Linux we behave
575 	 *	  like files and pipes do and wait for the last
576 	 *	  dereference.
577 	 *
578 	 * Can't we simply set sock->err?
579 	 *
580 	 *	  What the above comment does talk about? --ANK(980817)
581 	 */
582 
583 	if (unix_tot_inflight)
584 		unix_gc();		/* Garbage collect fds */
585 }
586 
587 static void init_peercred(struct sock *sk)
588 {
589 	put_pid(sk->sk_peer_pid);
590 	if (sk->sk_peer_cred)
591 		put_cred(sk->sk_peer_cred);
592 	sk->sk_peer_pid  = get_pid(task_tgid(current));
593 	sk->sk_peer_cred = get_current_cred();
594 }
595 
596 static void copy_peercred(struct sock *sk, struct sock *peersk)
597 {
598 	put_pid(sk->sk_peer_pid);
599 	if (sk->sk_peer_cred)
600 		put_cred(sk->sk_peer_cred);
601 	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
602 	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
603 }
604 
605 static int unix_listen(struct socket *sock, int backlog)
606 {
607 	int err;
608 	struct sock *sk = sock->sk;
609 	struct unix_sock *u = unix_sk(sk);
610 	struct pid *old_pid = NULL;
611 
612 	err = -EOPNOTSUPP;
613 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
614 		goto out;	/* Only stream/seqpacket sockets accept */
615 	err = -EINVAL;
616 	if (!u->addr)
617 		goto out;	/* No listens on an unbound socket */
618 	unix_state_lock(sk);
619 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
620 		goto out_unlock;
621 	if (backlog > sk->sk_max_ack_backlog)
622 		wake_up_interruptible_all(&u->peer_wait);
623 	sk->sk_max_ack_backlog	= backlog;
624 	sk->sk_state		= TCP_LISTEN;
625 	/* set credentials so connect can copy them */
626 	init_peercred(sk);
627 	err = 0;
628 
629 out_unlock:
630 	unix_state_unlock(sk);
631 	put_pid(old_pid);
632 out:
633 	return err;
634 }
635 
636 static int unix_release(struct socket *);
637 static int unix_bind(struct socket *, struct sockaddr *, int);
638 static int unix_stream_connect(struct socket *, struct sockaddr *,
639 			       int addr_len, int flags);
640 static int unix_socketpair(struct socket *, struct socket *);
641 static int unix_accept(struct socket *, struct socket *, int, bool);
642 static int unix_getname(struct socket *, struct sockaddr *, int);
643 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
644 static __poll_t unix_dgram_poll(struct file *, struct socket *,
645 				    poll_table *);
646 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
647 #ifdef CONFIG_COMPAT
648 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
649 #endif
650 static int unix_shutdown(struct socket *, int);
651 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
652 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
653 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
654 				    size_t size, int flags);
655 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
656 				       struct pipe_inode_info *, size_t size,
657 				       unsigned int flags);
658 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
659 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
660 static int unix_dgram_connect(struct socket *, struct sockaddr *,
661 			      int, int);
662 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
663 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
664 				  int);
665 
666 static int unix_set_peek_off(struct sock *sk, int val)
667 {
668 	struct unix_sock *u = unix_sk(sk);
669 
670 	if (mutex_lock_interruptible(&u->iolock))
671 		return -EINTR;
672 
673 	sk->sk_peek_off = val;
674 	mutex_unlock(&u->iolock);
675 
676 	return 0;
677 }
678 
679 
680 static const struct proto_ops unix_stream_ops = {
681 	.family =	PF_UNIX,
682 	.owner =	THIS_MODULE,
683 	.release =	unix_release,
684 	.bind =		unix_bind,
685 	.connect =	unix_stream_connect,
686 	.socketpair =	unix_socketpair,
687 	.accept =	unix_accept,
688 	.getname =	unix_getname,
689 	.poll =		unix_poll,
690 	.ioctl =	unix_ioctl,
691 #ifdef CONFIG_COMPAT
692 	.compat_ioctl =	unix_compat_ioctl,
693 #endif
694 	.listen =	unix_listen,
695 	.shutdown =	unix_shutdown,
696 	.setsockopt =	sock_no_setsockopt,
697 	.getsockopt =	sock_no_getsockopt,
698 	.sendmsg =	unix_stream_sendmsg,
699 	.recvmsg =	unix_stream_recvmsg,
700 	.mmap =		sock_no_mmap,
701 	.sendpage =	unix_stream_sendpage,
702 	.splice_read =	unix_stream_splice_read,
703 	.set_peek_off =	unix_set_peek_off,
704 };
705 
706 static const struct proto_ops unix_dgram_ops = {
707 	.family =	PF_UNIX,
708 	.owner =	THIS_MODULE,
709 	.release =	unix_release,
710 	.bind =		unix_bind,
711 	.connect =	unix_dgram_connect,
712 	.socketpair =	unix_socketpair,
713 	.accept =	sock_no_accept,
714 	.getname =	unix_getname,
715 	.poll =		unix_dgram_poll,
716 	.ioctl =	unix_ioctl,
717 #ifdef CONFIG_COMPAT
718 	.compat_ioctl =	unix_compat_ioctl,
719 #endif
720 	.listen =	sock_no_listen,
721 	.shutdown =	unix_shutdown,
722 	.setsockopt =	sock_no_setsockopt,
723 	.getsockopt =	sock_no_getsockopt,
724 	.sendmsg =	unix_dgram_sendmsg,
725 	.recvmsg =	unix_dgram_recvmsg,
726 	.mmap =		sock_no_mmap,
727 	.sendpage =	sock_no_sendpage,
728 	.set_peek_off =	unix_set_peek_off,
729 };
730 
731 static const struct proto_ops unix_seqpacket_ops = {
732 	.family =	PF_UNIX,
733 	.owner =	THIS_MODULE,
734 	.release =	unix_release,
735 	.bind =		unix_bind,
736 	.connect =	unix_stream_connect,
737 	.socketpair =	unix_socketpair,
738 	.accept =	unix_accept,
739 	.getname =	unix_getname,
740 	.poll =		unix_dgram_poll,
741 	.ioctl =	unix_ioctl,
742 #ifdef CONFIG_COMPAT
743 	.compat_ioctl =	unix_compat_ioctl,
744 #endif
745 	.listen =	unix_listen,
746 	.shutdown =	unix_shutdown,
747 	.setsockopt =	sock_no_setsockopt,
748 	.getsockopt =	sock_no_getsockopt,
749 	.sendmsg =	unix_seqpacket_sendmsg,
750 	.recvmsg =	unix_seqpacket_recvmsg,
751 	.mmap =		sock_no_mmap,
752 	.sendpage =	sock_no_sendpage,
753 	.set_peek_off =	unix_set_peek_off,
754 };
755 
756 static struct proto unix_proto = {
757 	.name			= "UNIX",
758 	.owner			= THIS_MODULE,
759 	.obj_size		= sizeof(struct unix_sock),
760 };
761 
762 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
763 {
764 	struct sock *sk = NULL;
765 	struct unix_sock *u;
766 
767 	atomic_long_inc(&unix_nr_socks);
768 	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
769 		goto out;
770 
771 	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
772 	if (!sk)
773 		goto out;
774 
775 	sock_init_data(sock, sk);
776 
777 	sk->sk_allocation	= GFP_KERNEL_ACCOUNT;
778 	sk->sk_write_space	= unix_write_space;
779 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
780 	sk->sk_destruct		= unix_sock_destructor;
781 	u	  = unix_sk(sk);
782 	u->path.dentry = NULL;
783 	u->path.mnt = NULL;
784 	spin_lock_init(&u->lock);
785 	atomic_long_set(&u->inflight, 0);
786 	INIT_LIST_HEAD(&u->link);
787 	mutex_init(&u->iolock); /* single task reading lock */
788 	mutex_init(&u->bindlock); /* single task binding lock */
789 	init_waitqueue_head(&u->peer_wait);
790 	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
791 	unix_insert_socket(unix_sockets_unbound(sk), sk);
792 out:
793 	if (sk == NULL)
794 		atomic_long_dec(&unix_nr_socks);
795 	else {
796 		local_bh_disable();
797 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
798 		local_bh_enable();
799 	}
800 	return sk;
801 }
802 
803 static int unix_create(struct net *net, struct socket *sock, int protocol,
804 		       int kern)
805 {
806 	if (protocol && protocol != PF_UNIX)
807 		return -EPROTONOSUPPORT;
808 
809 	sock->state = SS_UNCONNECTED;
810 
811 	switch (sock->type) {
812 	case SOCK_STREAM:
813 		sock->ops = &unix_stream_ops;
814 		break;
815 		/*
816 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
817 		 *	nothing uses it.
818 		 */
819 	case SOCK_RAW:
820 		sock->type = SOCK_DGRAM;
821 		/* fall through */
822 	case SOCK_DGRAM:
823 		sock->ops = &unix_dgram_ops;
824 		break;
825 	case SOCK_SEQPACKET:
826 		sock->ops = &unix_seqpacket_ops;
827 		break;
828 	default:
829 		return -ESOCKTNOSUPPORT;
830 	}
831 
832 	return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
833 }
834 
835 static int unix_release(struct socket *sock)
836 {
837 	struct sock *sk = sock->sk;
838 
839 	if (!sk)
840 		return 0;
841 
842 	unix_release_sock(sk, 0);
843 	sock->sk = NULL;
844 
845 	return 0;
846 }
847 
848 static int unix_autobind(struct socket *sock)
849 {
850 	struct sock *sk = sock->sk;
851 	struct net *net = sock_net(sk);
852 	struct unix_sock *u = unix_sk(sk);
853 	static u32 ordernum = 1;
854 	struct unix_address *addr;
855 	int err;
856 	unsigned int retries = 0;
857 
858 	err = mutex_lock_interruptible(&u->bindlock);
859 	if (err)
860 		return err;
861 
862 	err = 0;
863 	if (u->addr)
864 		goto out;
865 
866 	err = -ENOMEM;
867 	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
868 	if (!addr)
869 		goto out;
870 
871 	addr->name->sun_family = AF_UNIX;
872 	refcount_set(&addr->refcnt, 1);
873 
874 retry:
875 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
876 	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
877 
878 	spin_lock(&unix_table_lock);
879 	ordernum = (ordernum+1)&0xFFFFF;
880 
881 	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
882 				      addr->hash)) {
883 		spin_unlock(&unix_table_lock);
884 		/*
885 		 * __unix_find_socket_byname() may take long time if many names
886 		 * are already in use.
887 		 */
888 		cond_resched();
889 		/* Give up if all names seems to be in use. */
890 		if (retries++ == 0xFFFFF) {
891 			err = -ENOSPC;
892 			kfree(addr);
893 			goto out;
894 		}
895 		goto retry;
896 	}
897 	addr->hash ^= sk->sk_type;
898 
899 	__unix_remove_socket(sk);
900 	smp_store_release(&u->addr, addr);
901 	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
902 	spin_unlock(&unix_table_lock);
903 	err = 0;
904 
905 out:	mutex_unlock(&u->bindlock);
906 	return err;
907 }
908 
909 static struct sock *unix_find_other(struct net *net,
910 				    struct sockaddr_un *sunname, int len,
911 				    int type, unsigned int hash, int *error)
912 {
913 	struct sock *u;
914 	struct path path;
915 	int err = 0;
916 
917 	if (sunname->sun_path[0]) {
918 		struct inode *inode;
919 		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
920 		if (err)
921 			goto fail;
922 		inode = d_backing_inode(path.dentry);
923 		err = inode_permission(inode, MAY_WRITE);
924 		if (err)
925 			goto put_fail;
926 
927 		err = -ECONNREFUSED;
928 		if (!S_ISSOCK(inode->i_mode))
929 			goto put_fail;
930 		u = unix_find_socket_byinode(inode);
931 		if (!u)
932 			goto put_fail;
933 
934 		if (u->sk_type == type)
935 			touch_atime(&path);
936 
937 		path_put(&path);
938 
939 		err = -EPROTOTYPE;
940 		if (u->sk_type != type) {
941 			sock_put(u);
942 			goto fail;
943 		}
944 	} else {
945 		err = -ECONNREFUSED;
946 		u = unix_find_socket_byname(net, sunname, len, type, hash);
947 		if (u) {
948 			struct dentry *dentry;
949 			dentry = unix_sk(u)->path.dentry;
950 			if (dentry)
951 				touch_atime(&unix_sk(u)->path);
952 		} else
953 			goto fail;
954 	}
955 	return u;
956 
957 put_fail:
958 	path_put(&path);
959 fail:
960 	*error = err;
961 	return NULL;
962 }
963 
964 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
965 {
966 	struct dentry *dentry;
967 	struct path path;
968 	int err = 0;
969 	/*
970 	 * Get the parent directory, calculate the hash for last
971 	 * component.
972 	 */
973 	dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
974 	err = PTR_ERR(dentry);
975 	if (IS_ERR(dentry))
976 		return err;
977 
978 	/*
979 	 * All right, let's create it.
980 	 */
981 	err = security_path_mknod(&path, dentry, mode, 0);
982 	if (!err) {
983 		err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
984 		if (!err) {
985 			res->mnt = mntget(path.mnt);
986 			res->dentry = dget(dentry);
987 		}
988 	}
989 	done_path_create(&path, dentry);
990 	return err;
991 }
992 
993 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
994 {
995 	struct sock *sk = sock->sk;
996 	struct net *net = sock_net(sk);
997 	struct unix_sock *u = unix_sk(sk);
998 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
999 	char *sun_path = sunaddr->sun_path;
1000 	int err;
1001 	unsigned int hash;
1002 	struct unix_address *addr;
1003 	struct hlist_head *list;
1004 	struct path path = { };
1005 
1006 	err = -EINVAL;
1007 	if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1008 	    sunaddr->sun_family != AF_UNIX)
1009 		goto out;
1010 
1011 	if (addr_len == sizeof(short)) {
1012 		err = unix_autobind(sock);
1013 		goto out;
1014 	}
1015 
1016 	err = unix_mkname(sunaddr, addr_len, &hash);
1017 	if (err < 0)
1018 		goto out;
1019 	addr_len = err;
1020 
1021 	if (sun_path[0]) {
1022 		umode_t mode = S_IFSOCK |
1023 		       (SOCK_INODE(sock)->i_mode & ~current_umask());
1024 		err = unix_mknod(sun_path, mode, &path);
1025 		if (err) {
1026 			if (err == -EEXIST)
1027 				err = -EADDRINUSE;
1028 			goto out;
1029 		}
1030 	}
1031 
1032 	err = mutex_lock_interruptible(&u->bindlock);
1033 	if (err)
1034 		goto out_put;
1035 
1036 	err = -EINVAL;
1037 	if (u->addr)
1038 		goto out_up;
1039 
1040 	err = -ENOMEM;
1041 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1042 	if (!addr)
1043 		goto out_up;
1044 
1045 	memcpy(addr->name, sunaddr, addr_len);
1046 	addr->len = addr_len;
1047 	addr->hash = hash ^ sk->sk_type;
1048 	refcount_set(&addr->refcnt, 1);
1049 
1050 	if (sun_path[0]) {
1051 		addr->hash = UNIX_HASH_SIZE;
1052 		hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1053 		spin_lock(&unix_table_lock);
1054 		u->path = path;
1055 		list = &unix_socket_table[hash];
1056 	} else {
1057 		spin_lock(&unix_table_lock);
1058 		err = -EADDRINUSE;
1059 		if (__unix_find_socket_byname(net, sunaddr, addr_len,
1060 					      sk->sk_type, hash)) {
1061 			unix_release_addr(addr);
1062 			goto out_unlock;
1063 		}
1064 
1065 		list = &unix_socket_table[addr->hash];
1066 	}
1067 
1068 	err = 0;
1069 	__unix_remove_socket(sk);
1070 	smp_store_release(&u->addr, addr);
1071 	__unix_insert_socket(list, sk);
1072 
1073 out_unlock:
1074 	spin_unlock(&unix_table_lock);
1075 out_up:
1076 	mutex_unlock(&u->bindlock);
1077 out_put:
1078 	if (err)
1079 		path_put(&path);
1080 out:
1081 	return err;
1082 }
1083 
1084 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1085 {
1086 	if (unlikely(sk1 == sk2) || !sk2) {
1087 		unix_state_lock(sk1);
1088 		return;
1089 	}
1090 	if (sk1 < sk2) {
1091 		unix_state_lock(sk1);
1092 		unix_state_lock_nested(sk2);
1093 	} else {
1094 		unix_state_lock(sk2);
1095 		unix_state_lock_nested(sk1);
1096 	}
1097 }
1098 
1099 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1100 {
1101 	if (unlikely(sk1 == sk2) || !sk2) {
1102 		unix_state_unlock(sk1);
1103 		return;
1104 	}
1105 	unix_state_unlock(sk1);
1106 	unix_state_unlock(sk2);
1107 }
1108 
1109 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1110 			      int alen, int flags)
1111 {
1112 	struct sock *sk = sock->sk;
1113 	struct net *net = sock_net(sk);
1114 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1115 	struct sock *other;
1116 	unsigned int hash;
1117 	int err;
1118 
1119 	err = -EINVAL;
1120 	if (alen < offsetofend(struct sockaddr, sa_family))
1121 		goto out;
1122 
1123 	if (addr->sa_family != AF_UNSPEC) {
1124 		err = unix_mkname(sunaddr, alen, &hash);
1125 		if (err < 0)
1126 			goto out;
1127 		alen = err;
1128 
1129 		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1130 		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1131 			goto out;
1132 
1133 restart:
1134 		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1135 		if (!other)
1136 			goto out;
1137 
1138 		unix_state_double_lock(sk, other);
1139 
1140 		/* Apparently VFS overslept socket death. Retry. */
1141 		if (sock_flag(other, SOCK_DEAD)) {
1142 			unix_state_double_unlock(sk, other);
1143 			sock_put(other);
1144 			goto restart;
1145 		}
1146 
1147 		err = -EPERM;
1148 		if (!unix_may_send(sk, other))
1149 			goto out_unlock;
1150 
1151 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1152 		if (err)
1153 			goto out_unlock;
1154 
1155 	} else {
1156 		/*
1157 		 *	1003.1g breaking connected state with AF_UNSPEC
1158 		 */
1159 		other = NULL;
1160 		unix_state_double_lock(sk, other);
1161 	}
1162 
1163 	/*
1164 	 * If it was connected, reconnect.
1165 	 */
1166 	if (unix_peer(sk)) {
1167 		struct sock *old_peer = unix_peer(sk);
1168 		unix_peer(sk) = other;
1169 		unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1170 
1171 		unix_state_double_unlock(sk, other);
1172 
1173 		if (other != old_peer)
1174 			unix_dgram_disconnected(sk, old_peer);
1175 		sock_put(old_peer);
1176 	} else {
1177 		unix_peer(sk) = other;
1178 		unix_state_double_unlock(sk, other);
1179 	}
1180 	return 0;
1181 
1182 out_unlock:
1183 	unix_state_double_unlock(sk, other);
1184 	sock_put(other);
1185 out:
1186 	return err;
1187 }
1188 
1189 static long unix_wait_for_peer(struct sock *other, long timeo)
1190 {
1191 	struct unix_sock *u = unix_sk(other);
1192 	int sched;
1193 	DEFINE_WAIT(wait);
1194 
1195 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1196 
1197 	sched = !sock_flag(other, SOCK_DEAD) &&
1198 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1199 		unix_recvq_full(other);
1200 
1201 	unix_state_unlock(other);
1202 
1203 	if (sched)
1204 		timeo = schedule_timeout(timeo);
1205 
1206 	finish_wait(&u->peer_wait, &wait);
1207 	return timeo;
1208 }
1209 
1210 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1211 			       int addr_len, int flags)
1212 {
1213 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1214 	struct sock *sk = sock->sk;
1215 	struct net *net = sock_net(sk);
1216 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1217 	struct sock *newsk = NULL;
1218 	struct sock *other = NULL;
1219 	struct sk_buff *skb = NULL;
1220 	unsigned int hash;
1221 	int st;
1222 	int err;
1223 	long timeo;
1224 
1225 	err = unix_mkname(sunaddr, addr_len, &hash);
1226 	if (err < 0)
1227 		goto out;
1228 	addr_len = err;
1229 
1230 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1231 	    (err = unix_autobind(sock)) != 0)
1232 		goto out;
1233 
1234 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1235 
1236 	/* First of all allocate resources.
1237 	   If we will make it after state is locked,
1238 	   we will have to recheck all again in any case.
1239 	 */
1240 
1241 	err = -ENOMEM;
1242 
1243 	/* create new sock for complete connection */
1244 	newsk = unix_create1(sock_net(sk), NULL, 0);
1245 	if (newsk == NULL)
1246 		goto out;
1247 
1248 	/* Allocate skb for sending to listening sock */
1249 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1250 	if (skb == NULL)
1251 		goto out;
1252 
1253 restart:
1254 	/*  Find listening sock. */
1255 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1256 	if (!other)
1257 		goto out;
1258 
1259 	/* Latch state of peer */
1260 	unix_state_lock(other);
1261 
1262 	/* Apparently VFS overslept socket death. Retry. */
1263 	if (sock_flag(other, SOCK_DEAD)) {
1264 		unix_state_unlock(other);
1265 		sock_put(other);
1266 		goto restart;
1267 	}
1268 
1269 	err = -ECONNREFUSED;
1270 	if (other->sk_state != TCP_LISTEN)
1271 		goto out_unlock;
1272 	if (other->sk_shutdown & RCV_SHUTDOWN)
1273 		goto out_unlock;
1274 
1275 	if (unix_recvq_full(other)) {
1276 		err = -EAGAIN;
1277 		if (!timeo)
1278 			goto out_unlock;
1279 
1280 		timeo = unix_wait_for_peer(other, timeo);
1281 
1282 		err = sock_intr_errno(timeo);
1283 		if (signal_pending(current))
1284 			goto out;
1285 		sock_put(other);
1286 		goto restart;
1287 	}
1288 
1289 	/* Latch our state.
1290 
1291 	   It is tricky place. We need to grab our state lock and cannot
1292 	   drop lock on peer. It is dangerous because deadlock is
1293 	   possible. Connect to self case and simultaneous
1294 	   attempt to connect are eliminated by checking socket
1295 	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1296 	   check this before attempt to grab lock.
1297 
1298 	   Well, and we have to recheck the state after socket locked.
1299 	 */
1300 	st = sk->sk_state;
1301 
1302 	switch (st) {
1303 	case TCP_CLOSE:
1304 		/* This is ok... continue with connect */
1305 		break;
1306 	case TCP_ESTABLISHED:
1307 		/* Socket is already connected */
1308 		err = -EISCONN;
1309 		goto out_unlock;
1310 	default:
1311 		err = -EINVAL;
1312 		goto out_unlock;
1313 	}
1314 
1315 	unix_state_lock_nested(sk);
1316 
1317 	if (sk->sk_state != st) {
1318 		unix_state_unlock(sk);
1319 		unix_state_unlock(other);
1320 		sock_put(other);
1321 		goto restart;
1322 	}
1323 
1324 	err = security_unix_stream_connect(sk, other, newsk);
1325 	if (err) {
1326 		unix_state_unlock(sk);
1327 		goto out_unlock;
1328 	}
1329 
1330 	/* The way is open! Fastly set all the necessary fields... */
1331 
1332 	sock_hold(sk);
1333 	unix_peer(newsk)	= sk;
1334 	newsk->sk_state		= TCP_ESTABLISHED;
1335 	newsk->sk_type		= sk->sk_type;
1336 	init_peercred(newsk);
1337 	newu = unix_sk(newsk);
1338 	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1339 	otheru = unix_sk(other);
1340 
1341 	/* copy address information from listening to new sock
1342 	 *
1343 	 * The contents of *(otheru->addr) and otheru->path
1344 	 * are seen fully set up here, since we have found
1345 	 * otheru in hash under unix_table_lock.  Insertion
1346 	 * into the hash chain we'd found it in had been done
1347 	 * in an earlier critical area protected by unix_table_lock,
1348 	 * the same one where we'd set *(otheru->addr) contents,
1349 	 * as well as otheru->path and otheru->addr itself.
1350 	 *
1351 	 * Using smp_store_release() here to set newu->addr
1352 	 * is enough to make those stores, as well as stores
1353 	 * to newu->path visible to anyone who gets newu->addr
1354 	 * by smp_load_acquire().  IOW, the same warranties
1355 	 * as for unix_sock instances bound in unix_bind() or
1356 	 * in unix_autobind().
1357 	 */
1358 	if (otheru->path.dentry) {
1359 		path_get(&otheru->path);
1360 		newu->path = otheru->path;
1361 	}
1362 	refcount_inc(&otheru->addr->refcnt);
1363 	smp_store_release(&newu->addr, otheru->addr);
1364 
1365 	/* Set credentials */
1366 	copy_peercred(sk, other);
1367 
1368 	sock->state	= SS_CONNECTED;
1369 	sk->sk_state	= TCP_ESTABLISHED;
1370 	sock_hold(newsk);
1371 
1372 	smp_mb__after_atomic();	/* sock_hold() does an atomic_inc() */
1373 	unix_peer(sk)	= newsk;
1374 
1375 	unix_state_unlock(sk);
1376 
1377 	/* take ten and and send info to listening sock */
1378 	spin_lock(&other->sk_receive_queue.lock);
1379 	__skb_queue_tail(&other->sk_receive_queue, skb);
1380 	spin_unlock(&other->sk_receive_queue.lock);
1381 	unix_state_unlock(other);
1382 	other->sk_data_ready(other);
1383 	sock_put(other);
1384 	return 0;
1385 
1386 out_unlock:
1387 	if (other)
1388 		unix_state_unlock(other);
1389 
1390 out:
1391 	kfree_skb(skb);
1392 	if (newsk)
1393 		unix_release_sock(newsk, 0);
1394 	if (other)
1395 		sock_put(other);
1396 	return err;
1397 }
1398 
1399 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1400 {
1401 	struct sock *ska = socka->sk, *skb = sockb->sk;
1402 
1403 	/* Join our sockets back to back */
1404 	sock_hold(ska);
1405 	sock_hold(skb);
1406 	unix_peer(ska) = skb;
1407 	unix_peer(skb) = ska;
1408 	init_peercred(ska);
1409 	init_peercred(skb);
1410 
1411 	if (ska->sk_type != SOCK_DGRAM) {
1412 		ska->sk_state = TCP_ESTABLISHED;
1413 		skb->sk_state = TCP_ESTABLISHED;
1414 		socka->state  = SS_CONNECTED;
1415 		sockb->state  = SS_CONNECTED;
1416 	}
1417 	return 0;
1418 }
1419 
1420 static void unix_sock_inherit_flags(const struct socket *old,
1421 				    struct socket *new)
1422 {
1423 	if (test_bit(SOCK_PASSCRED, &old->flags))
1424 		set_bit(SOCK_PASSCRED, &new->flags);
1425 	if (test_bit(SOCK_PASSSEC, &old->flags))
1426 		set_bit(SOCK_PASSSEC, &new->flags);
1427 }
1428 
1429 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1430 		       bool kern)
1431 {
1432 	struct sock *sk = sock->sk;
1433 	struct sock *tsk;
1434 	struct sk_buff *skb;
1435 	int err;
1436 
1437 	err = -EOPNOTSUPP;
1438 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1439 		goto out;
1440 
1441 	err = -EINVAL;
1442 	if (sk->sk_state != TCP_LISTEN)
1443 		goto out;
1444 
1445 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1446 	 * so that no locks are necessary.
1447 	 */
1448 
1449 	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1450 	if (!skb) {
1451 		/* This means receive shutdown. */
1452 		if (err == 0)
1453 			err = -EINVAL;
1454 		goto out;
1455 	}
1456 
1457 	tsk = skb->sk;
1458 	skb_free_datagram(sk, skb);
1459 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1460 
1461 	/* attach accepted sock to socket */
1462 	unix_state_lock(tsk);
1463 	newsock->state = SS_CONNECTED;
1464 	unix_sock_inherit_flags(sock, newsock);
1465 	sock_graft(tsk, newsock);
1466 	unix_state_unlock(tsk);
1467 	return 0;
1468 
1469 out:
1470 	return err;
1471 }
1472 
1473 
1474 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1475 {
1476 	struct sock *sk = sock->sk;
1477 	struct unix_address *addr;
1478 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1479 	int err = 0;
1480 
1481 	if (peer) {
1482 		sk = unix_peer_get(sk);
1483 
1484 		err = -ENOTCONN;
1485 		if (!sk)
1486 			goto out;
1487 		err = 0;
1488 	} else {
1489 		sock_hold(sk);
1490 	}
1491 
1492 	addr = smp_load_acquire(&unix_sk(sk)->addr);
1493 	if (!addr) {
1494 		sunaddr->sun_family = AF_UNIX;
1495 		sunaddr->sun_path[0] = 0;
1496 		err = sizeof(short);
1497 	} else {
1498 		err = addr->len;
1499 		memcpy(sunaddr, addr->name, addr->len);
1500 	}
1501 	sock_put(sk);
1502 out:
1503 	return err;
1504 }
1505 
1506 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1507 {
1508 	int err = 0;
1509 
1510 	UNIXCB(skb).pid  = get_pid(scm->pid);
1511 	UNIXCB(skb).uid = scm->creds.uid;
1512 	UNIXCB(skb).gid = scm->creds.gid;
1513 	UNIXCB(skb).fp = NULL;
1514 	unix_get_secdata(scm, skb);
1515 	if (scm->fp && send_fds)
1516 		err = unix_attach_fds(scm, skb);
1517 
1518 	skb->destructor = unix_destruct_scm;
1519 	return err;
1520 }
1521 
1522 static bool unix_passcred_enabled(const struct socket *sock,
1523 				  const struct sock *other)
1524 {
1525 	return test_bit(SOCK_PASSCRED, &sock->flags) ||
1526 	       !other->sk_socket ||
1527 	       test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1528 }
1529 
1530 /*
1531  * Some apps rely on write() giving SCM_CREDENTIALS
1532  * We include credentials if source or destination socket
1533  * asserted SOCK_PASSCRED.
1534  */
1535 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1536 			    const struct sock *other)
1537 {
1538 	if (UNIXCB(skb).pid)
1539 		return;
1540 	if (unix_passcred_enabled(sock, other)) {
1541 		UNIXCB(skb).pid  = get_pid(task_tgid(current));
1542 		current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1543 	}
1544 }
1545 
1546 static int maybe_init_creds(struct scm_cookie *scm,
1547 			    struct socket *socket,
1548 			    const struct sock *other)
1549 {
1550 	int err;
1551 	struct msghdr msg = { .msg_controllen = 0 };
1552 
1553 	err = scm_send(socket, &msg, scm, false);
1554 	if (err)
1555 		return err;
1556 
1557 	if (unix_passcred_enabled(socket, other)) {
1558 		scm->pid = get_pid(task_tgid(current));
1559 		current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1560 	}
1561 	return err;
1562 }
1563 
1564 static bool unix_skb_scm_eq(struct sk_buff *skb,
1565 			    struct scm_cookie *scm)
1566 {
1567 	const struct unix_skb_parms *u = &UNIXCB(skb);
1568 
1569 	return u->pid == scm->pid &&
1570 	       uid_eq(u->uid, scm->creds.uid) &&
1571 	       gid_eq(u->gid, scm->creds.gid) &&
1572 	       unix_secdata_eq(scm, skb);
1573 }
1574 
1575 /*
1576  *	Send AF_UNIX data.
1577  */
1578 
1579 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1580 			      size_t len)
1581 {
1582 	struct sock *sk = sock->sk;
1583 	struct net *net = sock_net(sk);
1584 	struct unix_sock *u = unix_sk(sk);
1585 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1586 	struct sock *other = NULL;
1587 	int namelen = 0; /* fake GCC */
1588 	int err;
1589 	unsigned int hash;
1590 	struct sk_buff *skb;
1591 	long timeo;
1592 	struct scm_cookie scm;
1593 	int data_len = 0;
1594 	int sk_locked;
1595 
1596 	wait_for_unix_gc();
1597 	err = scm_send(sock, msg, &scm, false);
1598 	if (err < 0)
1599 		return err;
1600 
1601 	err = -EOPNOTSUPP;
1602 	if (msg->msg_flags&MSG_OOB)
1603 		goto out;
1604 
1605 	if (msg->msg_namelen) {
1606 		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1607 		if (err < 0)
1608 			goto out;
1609 		namelen = err;
1610 	} else {
1611 		sunaddr = NULL;
1612 		err = -ENOTCONN;
1613 		other = unix_peer_get(sk);
1614 		if (!other)
1615 			goto out;
1616 	}
1617 
1618 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1619 	    && (err = unix_autobind(sock)) != 0)
1620 		goto out;
1621 
1622 	err = -EMSGSIZE;
1623 	if (len > sk->sk_sndbuf - 32)
1624 		goto out;
1625 
1626 	if (len > SKB_MAX_ALLOC) {
1627 		data_len = min_t(size_t,
1628 				 len - SKB_MAX_ALLOC,
1629 				 MAX_SKB_FRAGS * PAGE_SIZE);
1630 		data_len = PAGE_ALIGN(data_len);
1631 
1632 		BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1633 	}
1634 
1635 	skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1636 				   msg->msg_flags & MSG_DONTWAIT, &err,
1637 				   PAGE_ALLOC_COSTLY_ORDER);
1638 	if (skb == NULL)
1639 		goto out;
1640 
1641 	err = unix_scm_to_skb(&scm, skb, true);
1642 	if (err < 0)
1643 		goto out_free;
1644 
1645 	skb_put(skb, len - data_len);
1646 	skb->data_len = data_len;
1647 	skb->len = len;
1648 	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1649 	if (err)
1650 		goto out_free;
1651 
1652 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1653 
1654 restart:
1655 	if (!other) {
1656 		err = -ECONNRESET;
1657 		if (sunaddr == NULL)
1658 			goto out_free;
1659 
1660 		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1661 					hash, &err);
1662 		if (other == NULL)
1663 			goto out_free;
1664 	}
1665 
1666 	if (sk_filter(other, skb) < 0) {
1667 		/* Toss the packet but do not return any error to the sender */
1668 		err = len;
1669 		goto out_free;
1670 	}
1671 
1672 	sk_locked = 0;
1673 	unix_state_lock(other);
1674 restart_locked:
1675 	err = -EPERM;
1676 	if (!unix_may_send(sk, other))
1677 		goto out_unlock;
1678 
1679 	if (unlikely(sock_flag(other, SOCK_DEAD))) {
1680 		/*
1681 		 *	Check with 1003.1g - what should
1682 		 *	datagram error
1683 		 */
1684 		unix_state_unlock(other);
1685 		sock_put(other);
1686 
1687 		if (!sk_locked)
1688 			unix_state_lock(sk);
1689 
1690 		err = 0;
1691 		if (unix_peer(sk) == other) {
1692 			unix_peer(sk) = NULL;
1693 			unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1694 
1695 			unix_state_unlock(sk);
1696 
1697 			unix_dgram_disconnected(sk, other);
1698 			sock_put(other);
1699 			err = -ECONNREFUSED;
1700 		} else {
1701 			unix_state_unlock(sk);
1702 		}
1703 
1704 		other = NULL;
1705 		if (err)
1706 			goto out_free;
1707 		goto restart;
1708 	}
1709 
1710 	err = -EPIPE;
1711 	if (other->sk_shutdown & RCV_SHUTDOWN)
1712 		goto out_unlock;
1713 
1714 	if (sk->sk_type != SOCK_SEQPACKET) {
1715 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1716 		if (err)
1717 			goto out_unlock;
1718 	}
1719 
1720 	/* other == sk && unix_peer(other) != sk if
1721 	 * - unix_peer(sk) == NULL, destination address bound to sk
1722 	 * - unix_peer(sk) == sk by time of get but disconnected before lock
1723 	 */
1724 	if (other != sk &&
1725 	    unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1726 		if (timeo) {
1727 			timeo = unix_wait_for_peer(other, timeo);
1728 
1729 			err = sock_intr_errno(timeo);
1730 			if (signal_pending(current))
1731 				goto out_free;
1732 
1733 			goto restart;
1734 		}
1735 
1736 		if (!sk_locked) {
1737 			unix_state_unlock(other);
1738 			unix_state_double_lock(sk, other);
1739 		}
1740 
1741 		if (unix_peer(sk) != other ||
1742 		    unix_dgram_peer_wake_me(sk, other)) {
1743 			err = -EAGAIN;
1744 			sk_locked = 1;
1745 			goto out_unlock;
1746 		}
1747 
1748 		if (!sk_locked) {
1749 			sk_locked = 1;
1750 			goto restart_locked;
1751 		}
1752 	}
1753 
1754 	if (unlikely(sk_locked))
1755 		unix_state_unlock(sk);
1756 
1757 	if (sock_flag(other, SOCK_RCVTSTAMP))
1758 		__net_timestamp(skb);
1759 	maybe_add_creds(skb, sock, other);
1760 	skb_queue_tail(&other->sk_receive_queue, skb);
1761 	unix_state_unlock(other);
1762 	other->sk_data_ready(other);
1763 	sock_put(other);
1764 	scm_destroy(&scm);
1765 	return len;
1766 
1767 out_unlock:
1768 	if (sk_locked)
1769 		unix_state_unlock(sk);
1770 	unix_state_unlock(other);
1771 out_free:
1772 	kfree_skb(skb);
1773 out:
1774 	if (other)
1775 		sock_put(other);
1776 	scm_destroy(&scm);
1777 	return err;
1778 }
1779 
1780 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1781  * bytes, and a minimum of a full page.
1782  */
1783 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1784 
1785 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1786 			       size_t len)
1787 {
1788 	struct sock *sk = sock->sk;
1789 	struct sock *other = NULL;
1790 	int err, size;
1791 	struct sk_buff *skb;
1792 	int sent = 0;
1793 	struct scm_cookie scm;
1794 	bool fds_sent = false;
1795 	int data_len;
1796 
1797 	wait_for_unix_gc();
1798 	err = scm_send(sock, msg, &scm, false);
1799 	if (err < 0)
1800 		return err;
1801 
1802 	err = -EOPNOTSUPP;
1803 	if (msg->msg_flags&MSG_OOB)
1804 		goto out_err;
1805 
1806 	if (msg->msg_namelen) {
1807 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1808 		goto out_err;
1809 	} else {
1810 		err = -ENOTCONN;
1811 		other = unix_peer(sk);
1812 		if (!other)
1813 			goto out_err;
1814 	}
1815 
1816 	if (sk->sk_shutdown & SEND_SHUTDOWN)
1817 		goto pipe_err;
1818 
1819 	while (sent < len) {
1820 		size = len - sent;
1821 
1822 		/* Keep two messages in the pipe so it schedules better */
1823 		size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1824 
1825 		/* allow fallback to order-0 allocations */
1826 		size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1827 
1828 		data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1829 
1830 		data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1831 
1832 		skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1833 					   msg->msg_flags & MSG_DONTWAIT, &err,
1834 					   get_order(UNIX_SKB_FRAGS_SZ));
1835 		if (!skb)
1836 			goto out_err;
1837 
1838 		/* Only send the fds in the first buffer */
1839 		err = unix_scm_to_skb(&scm, skb, !fds_sent);
1840 		if (err < 0) {
1841 			kfree_skb(skb);
1842 			goto out_err;
1843 		}
1844 		fds_sent = true;
1845 
1846 		skb_put(skb, size - data_len);
1847 		skb->data_len = data_len;
1848 		skb->len = size;
1849 		err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1850 		if (err) {
1851 			kfree_skb(skb);
1852 			goto out_err;
1853 		}
1854 
1855 		unix_state_lock(other);
1856 
1857 		if (sock_flag(other, SOCK_DEAD) ||
1858 		    (other->sk_shutdown & RCV_SHUTDOWN))
1859 			goto pipe_err_free;
1860 
1861 		maybe_add_creds(skb, sock, other);
1862 		skb_queue_tail(&other->sk_receive_queue, skb);
1863 		unix_state_unlock(other);
1864 		other->sk_data_ready(other);
1865 		sent += size;
1866 	}
1867 
1868 	scm_destroy(&scm);
1869 
1870 	return sent;
1871 
1872 pipe_err_free:
1873 	unix_state_unlock(other);
1874 	kfree_skb(skb);
1875 pipe_err:
1876 	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1877 		send_sig(SIGPIPE, current, 0);
1878 	err = -EPIPE;
1879 out_err:
1880 	scm_destroy(&scm);
1881 	return sent ? : err;
1882 }
1883 
1884 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1885 				    int offset, size_t size, int flags)
1886 {
1887 	int err;
1888 	bool send_sigpipe = false;
1889 	bool init_scm = true;
1890 	struct scm_cookie scm;
1891 	struct sock *other, *sk = socket->sk;
1892 	struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1893 
1894 	if (flags & MSG_OOB)
1895 		return -EOPNOTSUPP;
1896 
1897 	other = unix_peer(sk);
1898 	if (!other || sk->sk_state != TCP_ESTABLISHED)
1899 		return -ENOTCONN;
1900 
1901 	if (false) {
1902 alloc_skb:
1903 		unix_state_unlock(other);
1904 		mutex_unlock(&unix_sk(other)->iolock);
1905 		newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1906 					      &err, 0);
1907 		if (!newskb)
1908 			goto err;
1909 	}
1910 
1911 	/* we must acquire iolock as we modify already present
1912 	 * skbs in the sk_receive_queue and mess with skb->len
1913 	 */
1914 	err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1915 	if (err) {
1916 		err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1917 		goto err;
1918 	}
1919 
1920 	if (sk->sk_shutdown & SEND_SHUTDOWN) {
1921 		err = -EPIPE;
1922 		send_sigpipe = true;
1923 		goto err_unlock;
1924 	}
1925 
1926 	unix_state_lock(other);
1927 
1928 	if (sock_flag(other, SOCK_DEAD) ||
1929 	    other->sk_shutdown & RCV_SHUTDOWN) {
1930 		err = -EPIPE;
1931 		send_sigpipe = true;
1932 		goto err_state_unlock;
1933 	}
1934 
1935 	if (init_scm) {
1936 		err = maybe_init_creds(&scm, socket, other);
1937 		if (err)
1938 			goto err_state_unlock;
1939 		init_scm = false;
1940 	}
1941 
1942 	skb = skb_peek_tail(&other->sk_receive_queue);
1943 	if (tail && tail == skb) {
1944 		skb = newskb;
1945 	} else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
1946 		if (newskb) {
1947 			skb = newskb;
1948 		} else {
1949 			tail = skb;
1950 			goto alloc_skb;
1951 		}
1952 	} else if (newskb) {
1953 		/* this is fast path, we don't necessarily need to
1954 		 * call to kfree_skb even though with newskb == NULL
1955 		 * this - does no harm
1956 		 */
1957 		consume_skb(newskb);
1958 		newskb = NULL;
1959 	}
1960 
1961 	if (skb_append_pagefrags(skb, page, offset, size)) {
1962 		tail = skb;
1963 		goto alloc_skb;
1964 	}
1965 
1966 	skb->len += size;
1967 	skb->data_len += size;
1968 	skb->truesize += size;
1969 	refcount_add(size, &sk->sk_wmem_alloc);
1970 
1971 	if (newskb) {
1972 		err = unix_scm_to_skb(&scm, skb, false);
1973 		if (err)
1974 			goto err_state_unlock;
1975 		spin_lock(&other->sk_receive_queue.lock);
1976 		__skb_queue_tail(&other->sk_receive_queue, newskb);
1977 		spin_unlock(&other->sk_receive_queue.lock);
1978 	}
1979 
1980 	unix_state_unlock(other);
1981 	mutex_unlock(&unix_sk(other)->iolock);
1982 
1983 	other->sk_data_ready(other);
1984 	scm_destroy(&scm);
1985 	return size;
1986 
1987 err_state_unlock:
1988 	unix_state_unlock(other);
1989 err_unlock:
1990 	mutex_unlock(&unix_sk(other)->iolock);
1991 err:
1992 	kfree_skb(newskb);
1993 	if (send_sigpipe && !(flags & MSG_NOSIGNAL))
1994 		send_sig(SIGPIPE, current, 0);
1995 	if (!init_scm)
1996 		scm_destroy(&scm);
1997 	return err;
1998 }
1999 
2000 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2001 				  size_t len)
2002 {
2003 	int err;
2004 	struct sock *sk = sock->sk;
2005 
2006 	err = sock_error(sk);
2007 	if (err)
2008 		return err;
2009 
2010 	if (sk->sk_state != TCP_ESTABLISHED)
2011 		return -ENOTCONN;
2012 
2013 	if (msg->msg_namelen)
2014 		msg->msg_namelen = 0;
2015 
2016 	return unix_dgram_sendmsg(sock, msg, len);
2017 }
2018 
2019 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2020 				  size_t size, int flags)
2021 {
2022 	struct sock *sk = sock->sk;
2023 
2024 	if (sk->sk_state != TCP_ESTABLISHED)
2025 		return -ENOTCONN;
2026 
2027 	return unix_dgram_recvmsg(sock, msg, size, flags);
2028 }
2029 
2030 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2031 {
2032 	struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2033 
2034 	if (addr) {
2035 		msg->msg_namelen = addr->len;
2036 		memcpy(msg->msg_name, addr->name, addr->len);
2037 	}
2038 }
2039 
2040 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2041 			      size_t size, int flags)
2042 {
2043 	struct scm_cookie scm;
2044 	struct sock *sk = sock->sk;
2045 	struct unix_sock *u = unix_sk(sk);
2046 	struct sk_buff *skb, *last;
2047 	long timeo;
2048 	int skip;
2049 	int err;
2050 
2051 	err = -EOPNOTSUPP;
2052 	if (flags&MSG_OOB)
2053 		goto out;
2054 
2055 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2056 
2057 	do {
2058 		mutex_lock(&u->iolock);
2059 
2060 		skip = sk_peek_offset(sk, flags);
2061 		skb = __skb_try_recv_datagram(sk, flags, NULL, &skip, &err,
2062 					      &last);
2063 		if (skb)
2064 			break;
2065 
2066 		mutex_unlock(&u->iolock);
2067 
2068 		if (err != -EAGAIN)
2069 			break;
2070 	} while (timeo &&
2071 		 !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2072 
2073 	if (!skb) { /* implies iolock unlocked */
2074 		unix_state_lock(sk);
2075 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2076 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2077 		    (sk->sk_shutdown & RCV_SHUTDOWN))
2078 			err = 0;
2079 		unix_state_unlock(sk);
2080 		goto out;
2081 	}
2082 
2083 	if (wq_has_sleeper(&u->peer_wait))
2084 		wake_up_interruptible_sync_poll(&u->peer_wait,
2085 						EPOLLOUT | EPOLLWRNORM |
2086 						EPOLLWRBAND);
2087 
2088 	if (msg->msg_name)
2089 		unix_copy_addr(msg, skb->sk);
2090 
2091 	if (size > skb->len - skip)
2092 		size = skb->len - skip;
2093 	else if (size < skb->len - skip)
2094 		msg->msg_flags |= MSG_TRUNC;
2095 
2096 	err = skb_copy_datagram_msg(skb, skip, msg, size);
2097 	if (err)
2098 		goto out_free;
2099 
2100 	if (sock_flag(sk, SOCK_RCVTSTAMP))
2101 		__sock_recv_timestamp(msg, sk, skb);
2102 
2103 	memset(&scm, 0, sizeof(scm));
2104 
2105 	scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2106 	unix_set_secdata(&scm, skb);
2107 
2108 	if (!(flags & MSG_PEEK)) {
2109 		if (UNIXCB(skb).fp)
2110 			unix_detach_fds(&scm, skb);
2111 
2112 		sk_peek_offset_bwd(sk, skb->len);
2113 	} else {
2114 		/* It is questionable: on PEEK we could:
2115 		   - do not return fds - good, but too simple 8)
2116 		   - return fds, and do not return them on read (old strategy,
2117 		     apparently wrong)
2118 		   - clone fds (I chose it for now, it is the most universal
2119 		     solution)
2120 
2121 		   POSIX 1003.1g does not actually define this clearly
2122 		   at all. POSIX 1003.1g doesn't define a lot of things
2123 		   clearly however!
2124 
2125 		*/
2126 
2127 		sk_peek_offset_fwd(sk, size);
2128 
2129 		if (UNIXCB(skb).fp)
2130 			scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2131 	}
2132 	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2133 
2134 	scm_recv(sock, msg, &scm, flags);
2135 
2136 out_free:
2137 	skb_free_datagram(sk, skb);
2138 	mutex_unlock(&u->iolock);
2139 out:
2140 	return err;
2141 }
2142 
2143 /*
2144  *	Sleep until more data has arrived. But check for races..
2145  */
2146 static long unix_stream_data_wait(struct sock *sk, long timeo,
2147 				  struct sk_buff *last, unsigned int last_len,
2148 				  bool freezable)
2149 {
2150 	struct sk_buff *tail;
2151 	DEFINE_WAIT(wait);
2152 
2153 	unix_state_lock(sk);
2154 
2155 	for (;;) {
2156 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2157 
2158 		tail = skb_peek_tail(&sk->sk_receive_queue);
2159 		if (tail != last ||
2160 		    (tail && tail->len != last_len) ||
2161 		    sk->sk_err ||
2162 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
2163 		    signal_pending(current) ||
2164 		    !timeo)
2165 			break;
2166 
2167 		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2168 		unix_state_unlock(sk);
2169 		if (freezable)
2170 			timeo = freezable_schedule_timeout(timeo);
2171 		else
2172 			timeo = schedule_timeout(timeo);
2173 		unix_state_lock(sk);
2174 
2175 		if (sock_flag(sk, SOCK_DEAD))
2176 			break;
2177 
2178 		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2179 	}
2180 
2181 	finish_wait(sk_sleep(sk), &wait);
2182 	unix_state_unlock(sk);
2183 	return timeo;
2184 }
2185 
2186 static unsigned int unix_skb_len(const struct sk_buff *skb)
2187 {
2188 	return skb->len - UNIXCB(skb).consumed;
2189 }
2190 
2191 struct unix_stream_read_state {
2192 	int (*recv_actor)(struct sk_buff *, int, int,
2193 			  struct unix_stream_read_state *);
2194 	struct socket *socket;
2195 	struct msghdr *msg;
2196 	struct pipe_inode_info *pipe;
2197 	size_t size;
2198 	int flags;
2199 	unsigned int splice_flags;
2200 };
2201 
2202 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2203 				    bool freezable)
2204 {
2205 	struct scm_cookie scm;
2206 	struct socket *sock = state->socket;
2207 	struct sock *sk = sock->sk;
2208 	struct unix_sock *u = unix_sk(sk);
2209 	int copied = 0;
2210 	int flags = state->flags;
2211 	int noblock = flags & MSG_DONTWAIT;
2212 	bool check_creds = false;
2213 	int target;
2214 	int err = 0;
2215 	long timeo;
2216 	int skip;
2217 	size_t size = state->size;
2218 	unsigned int last_len;
2219 
2220 	if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2221 		err = -EINVAL;
2222 		goto out;
2223 	}
2224 
2225 	if (unlikely(flags & MSG_OOB)) {
2226 		err = -EOPNOTSUPP;
2227 		goto out;
2228 	}
2229 
2230 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2231 	timeo = sock_rcvtimeo(sk, noblock);
2232 
2233 	memset(&scm, 0, sizeof(scm));
2234 
2235 	/* Lock the socket to prevent queue disordering
2236 	 * while sleeps in memcpy_tomsg
2237 	 */
2238 	mutex_lock(&u->iolock);
2239 
2240 	skip = max(sk_peek_offset(sk, flags), 0);
2241 
2242 	do {
2243 		int chunk;
2244 		bool drop_skb;
2245 		struct sk_buff *skb, *last;
2246 
2247 redo:
2248 		unix_state_lock(sk);
2249 		if (sock_flag(sk, SOCK_DEAD)) {
2250 			err = -ECONNRESET;
2251 			goto unlock;
2252 		}
2253 		last = skb = skb_peek(&sk->sk_receive_queue);
2254 		last_len = last ? last->len : 0;
2255 again:
2256 		if (skb == NULL) {
2257 			if (copied >= target)
2258 				goto unlock;
2259 
2260 			/*
2261 			 *	POSIX 1003.1g mandates this order.
2262 			 */
2263 
2264 			err = sock_error(sk);
2265 			if (err)
2266 				goto unlock;
2267 			if (sk->sk_shutdown & RCV_SHUTDOWN)
2268 				goto unlock;
2269 
2270 			unix_state_unlock(sk);
2271 			if (!timeo) {
2272 				err = -EAGAIN;
2273 				break;
2274 			}
2275 
2276 			mutex_unlock(&u->iolock);
2277 
2278 			timeo = unix_stream_data_wait(sk, timeo, last,
2279 						      last_len, freezable);
2280 
2281 			if (signal_pending(current)) {
2282 				err = sock_intr_errno(timeo);
2283 				scm_destroy(&scm);
2284 				goto out;
2285 			}
2286 
2287 			mutex_lock(&u->iolock);
2288 			goto redo;
2289 unlock:
2290 			unix_state_unlock(sk);
2291 			break;
2292 		}
2293 
2294 		while (skip >= unix_skb_len(skb)) {
2295 			skip -= unix_skb_len(skb);
2296 			last = skb;
2297 			last_len = skb->len;
2298 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2299 			if (!skb)
2300 				goto again;
2301 		}
2302 
2303 		unix_state_unlock(sk);
2304 
2305 		if (check_creds) {
2306 			/* Never glue messages from different writers */
2307 			if (!unix_skb_scm_eq(skb, &scm))
2308 				break;
2309 		} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2310 			/* Copy credentials */
2311 			scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2312 			unix_set_secdata(&scm, skb);
2313 			check_creds = true;
2314 		}
2315 
2316 		/* Copy address just once */
2317 		if (state->msg && state->msg->msg_name) {
2318 			DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2319 					 state->msg->msg_name);
2320 			unix_copy_addr(state->msg, skb->sk);
2321 			sunaddr = NULL;
2322 		}
2323 
2324 		chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2325 		skb_get(skb);
2326 		chunk = state->recv_actor(skb, skip, chunk, state);
2327 		drop_skb = !unix_skb_len(skb);
2328 		/* skb is only safe to use if !drop_skb */
2329 		consume_skb(skb);
2330 		if (chunk < 0) {
2331 			if (copied == 0)
2332 				copied = -EFAULT;
2333 			break;
2334 		}
2335 		copied += chunk;
2336 		size -= chunk;
2337 
2338 		if (drop_skb) {
2339 			/* the skb was touched by a concurrent reader;
2340 			 * we should not expect anything from this skb
2341 			 * anymore and assume it invalid - we can be
2342 			 * sure it was dropped from the socket queue
2343 			 *
2344 			 * let's report a short read
2345 			 */
2346 			err = 0;
2347 			break;
2348 		}
2349 
2350 		/* Mark read part of skb as used */
2351 		if (!(flags & MSG_PEEK)) {
2352 			UNIXCB(skb).consumed += chunk;
2353 
2354 			sk_peek_offset_bwd(sk, chunk);
2355 
2356 			if (UNIXCB(skb).fp)
2357 				unix_detach_fds(&scm, skb);
2358 
2359 			if (unix_skb_len(skb))
2360 				break;
2361 
2362 			skb_unlink(skb, &sk->sk_receive_queue);
2363 			consume_skb(skb);
2364 
2365 			if (scm.fp)
2366 				break;
2367 		} else {
2368 			/* It is questionable, see note in unix_dgram_recvmsg.
2369 			 */
2370 			if (UNIXCB(skb).fp)
2371 				scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2372 
2373 			sk_peek_offset_fwd(sk, chunk);
2374 
2375 			if (UNIXCB(skb).fp)
2376 				break;
2377 
2378 			skip = 0;
2379 			last = skb;
2380 			last_len = skb->len;
2381 			unix_state_lock(sk);
2382 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2383 			if (skb)
2384 				goto again;
2385 			unix_state_unlock(sk);
2386 			break;
2387 		}
2388 	} while (size);
2389 
2390 	mutex_unlock(&u->iolock);
2391 	if (state->msg)
2392 		scm_recv(sock, state->msg, &scm, flags);
2393 	else
2394 		scm_destroy(&scm);
2395 out:
2396 	return copied ? : err;
2397 }
2398 
2399 static int unix_stream_read_actor(struct sk_buff *skb,
2400 				  int skip, int chunk,
2401 				  struct unix_stream_read_state *state)
2402 {
2403 	int ret;
2404 
2405 	ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2406 				    state->msg, chunk);
2407 	return ret ?: chunk;
2408 }
2409 
2410 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2411 			       size_t size, int flags)
2412 {
2413 	struct unix_stream_read_state state = {
2414 		.recv_actor = unix_stream_read_actor,
2415 		.socket = sock,
2416 		.msg = msg,
2417 		.size = size,
2418 		.flags = flags
2419 	};
2420 
2421 	return unix_stream_read_generic(&state, true);
2422 }
2423 
2424 static int unix_stream_splice_actor(struct sk_buff *skb,
2425 				    int skip, int chunk,
2426 				    struct unix_stream_read_state *state)
2427 {
2428 	return skb_splice_bits(skb, state->socket->sk,
2429 			       UNIXCB(skb).consumed + skip,
2430 			       state->pipe, chunk, state->splice_flags);
2431 }
2432 
2433 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2434 				       struct pipe_inode_info *pipe,
2435 				       size_t size, unsigned int flags)
2436 {
2437 	struct unix_stream_read_state state = {
2438 		.recv_actor = unix_stream_splice_actor,
2439 		.socket = sock,
2440 		.pipe = pipe,
2441 		.size = size,
2442 		.splice_flags = flags,
2443 	};
2444 
2445 	if (unlikely(*ppos))
2446 		return -ESPIPE;
2447 
2448 	if (sock->file->f_flags & O_NONBLOCK ||
2449 	    flags & SPLICE_F_NONBLOCK)
2450 		state.flags = MSG_DONTWAIT;
2451 
2452 	return unix_stream_read_generic(&state, false);
2453 }
2454 
2455 static int unix_shutdown(struct socket *sock, int mode)
2456 {
2457 	struct sock *sk = sock->sk;
2458 	struct sock *other;
2459 
2460 	if (mode < SHUT_RD || mode > SHUT_RDWR)
2461 		return -EINVAL;
2462 	/* This maps:
2463 	 * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2464 	 * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2465 	 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2466 	 */
2467 	++mode;
2468 
2469 	unix_state_lock(sk);
2470 	sk->sk_shutdown |= mode;
2471 	other = unix_peer(sk);
2472 	if (other)
2473 		sock_hold(other);
2474 	unix_state_unlock(sk);
2475 	sk->sk_state_change(sk);
2476 
2477 	if (other &&
2478 		(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2479 
2480 		int peer_mode = 0;
2481 
2482 		if (mode&RCV_SHUTDOWN)
2483 			peer_mode |= SEND_SHUTDOWN;
2484 		if (mode&SEND_SHUTDOWN)
2485 			peer_mode |= RCV_SHUTDOWN;
2486 		unix_state_lock(other);
2487 		other->sk_shutdown |= peer_mode;
2488 		unix_state_unlock(other);
2489 		other->sk_state_change(other);
2490 		if (peer_mode == SHUTDOWN_MASK)
2491 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2492 		else if (peer_mode & RCV_SHUTDOWN)
2493 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2494 	}
2495 	if (other)
2496 		sock_put(other);
2497 
2498 	return 0;
2499 }
2500 
2501 long unix_inq_len(struct sock *sk)
2502 {
2503 	struct sk_buff *skb;
2504 	long amount = 0;
2505 
2506 	if (sk->sk_state == TCP_LISTEN)
2507 		return -EINVAL;
2508 
2509 	spin_lock(&sk->sk_receive_queue.lock);
2510 	if (sk->sk_type == SOCK_STREAM ||
2511 	    sk->sk_type == SOCK_SEQPACKET) {
2512 		skb_queue_walk(&sk->sk_receive_queue, skb)
2513 			amount += unix_skb_len(skb);
2514 	} else {
2515 		skb = skb_peek(&sk->sk_receive_queue);
2516 		if (skb)
2517 			amount = skb->len;
2518 	}
2519 	spin_unlock(&sk->sk_receive_queue.lock);
2520 
2521 	return amount;
2522 }
2523 EXPORT_SYMBOL_GPL(unix_inq_len);
2524 
2525 long unix_outq_len(struct sock *sk)
2526 {
2527 	return sk_wmem_alloc_get(sk);
2528 }
2529 EXPORT_SYMBOL_GPL(unix_outq_len);
2530 
2531 static int unix_open_file(struct sock *sk)
2532 {
2533 	struct path path;
2534 	struct file *f;
2535 	int fd;
2536 
2537 	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2538 		return -EPERM;
2539 
2540 	if (!smp_load_acquire(&unix_sk(sk)->addr))
2541 		return -ENOENT;
2542 
2543 	path = unix_sk(sk)->path;
2544 	if (!path.dentry)
2545 		return -ENOENT;
2546 
2547 	path_get(&path);
2548 
2549 	fd = get_unused_fd_flags(O_CLOEXEC);
2550 	if (fd < 0)
2551 		goto out;
2552 
2553 	f = dentry_open(&path, O_PATH, current_cred());
2554 	if (IS_ERR(f)) {
2555 		put_unused_fd(fd);
2556 		fd = PTR_ERR(f);
2557 		goto out;
2558 	}
2559 
2560 	fd_install(fd, f);
2561 out:
2562 	path_put(&path);
2563 
2564 	return fd;
2565 }
2566 
2567 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2568 {
2569 	struct sock *sk = sock->sk;
2570 	long amount = 0;
2571 	int err;
2572 
2573 	switch (cmd) {
2574 	case SIOCOUTQ:
2575 		amount = unix_outq_len(sk);
2576 		err = put_user(amount, (int __user *)arg);
2577 		break;
2578 	case SIOCINQ:
2579 		amount = unix_inq_len(sk);
2580 		if (amount < 0)
2581 			err = amount;
2582 		else
2583 			err = put_user(amount, (int __user *)arg);
2584 		break;
2585 	case SIOCUNIXFILE:
2586 		err = unix_open_file(sk);
2587 		break;
2588 	default:
2589 		err = -ENOIOCTLCMD;
2590 		break;
2591 	}
2592 	return err;
2593 }
2594 
2595 #ifdef CONFIG_COMPAT
2596 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2597 {
2598 	return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
2599 }
2600 #endif
2601 
2602 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2603 {
2604 	struct sock *sk = sock->sk;
2605 	__poll_t mask;
2606 
2607 	sock_poll_wait(file, sock, wait);
2608 	mask = 0;
2609 
2610 	/* exceptional events? */
2611 	if (sk->sk_err)
2612 		mask |= EPOLLERR;
2613 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2614 		mask |= EPOLLHUP;
2615 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2616 		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2617 
2618 	/* readable? */
2619 	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2620 		mask |= EPOLLIN | EPOLLRDNORM;
2621 
2622 	/* Connection-based need to check for termination and startup */
2623 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2624 	    sk->sk_state == TCP_CLOSE)
2625 		mask |= EPOLLHUP;
2626 
2627 	/*
2628 	 * we set writable also when the other side has shut down the
2629 	 * connection. This prevents stuck sockets.
2630 	 */
2631 	if (unix_writable(sk))
2632 		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2633 
2634 	return mask;
2635 }
2636 
2637 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2638 				    poll_table *wait)
2639 {
2640 	struct sock *sk = sock->sk, *other;
2641 	unsigned int writable;
2642 	__poll_t mask;
2643 
2644 	sock_poll_wait(file, sock, wait);
2645 	mask = 0;
2646 
2647 	/* exceptional events? */
2648 	if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
2649 		mask |= EPOLLERR |
2650 			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
2651 
2652 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2653 		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2654 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2655 		mask |= EPOLLHUP;
2656 
2657 	/* readable? */
2658 	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2659 		mask |= EPOLLIN | EPOLLRDNORM;
2660 
2661 	/* Connection-based need to check for termination and startup */
2662 	if (sk->sk_type == SOCK_SEQPACKET) {
2663 		if (sk->sk_state == TCP_CLOSE)
2664 			mask |= EPOLLHUP;
2665 		/* connection hasn't started yet? */
2666 		if (sk->sk_state == TCP_SYN_SENT)
2667 			return mask;
2668 	}
2669 
2670 	/* No write status requested, avoid expensive OUT tests. */
2671 	if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2672 		return mask;
2673 
2674 	writable = unix_writable(sk);
2675 	if (writable) {
2676 		unix_state_lock(sk);
2677 
2678 		other = unix_peer(sk);
2679 		if (other && unix_peer(other) != sk &&
2680 		    unix_recvq_full(other) &&
2681 		    unix_dgram_peer_wake_me(sk, other))
2682 			writable = 0;
2683 
2684 		unix_state_unlock(sk);
2685 	}
2686 
2687 	if (writable)
2688 		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2689 	else
2690 		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2691 
2692 	return mask;
2693 }
2694 
2695 #ifdef CONFIG_PROC_FS
2696 
2697 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2698 
2699 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2700 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2701 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2702 
2703 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2704 {
2705 	unsigned long offset = get_offset(*pos);
2706 	unsigned long bucket = get_bucket(*pos);
2707 	struct sock *sk;
2708 	unsigned long count = 0;
2709 
2710 	for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2711 		if (sock_net(sk) != seq_file_net(seq))
2712 			continue;
2713 		if (++count == offset)
2714 			break;
2715 	}
2716 
2717 	return sk;
2718 }
2719 
2720 static struct sock *unix_next_socket(struct seq_file *seq,
2721 				     struct sock *sk,
2722 				     loff_t *pos)
2723 {
2724 	unsigned long bucket;
2725 
2726 	while (sk > (struct sock *)SEQ_START_TOKEN) {
2727 		sk = sk_next(sk);
2728 		if (!sk)
2729 			goto next_bucket;
2730 		if (sock_net(sk) == seq_file_net(seq))
2731 			return sk;
2732 	}
2733 
2734 	do {
2735 		sk = unix_from_bucket(seq, pos);
2736 		if (sk)
2737 			return sk;
2738 
2739 next_bucket:
2740 		bucket = get_bucket(*pos) + 1;
2741 		*pos = set_bucket_offset(bucket, 1);
2742 	} while (bucket < ARRAY_SIZE(unix_socket_table));
2743 
2744 	return NULL;
2745 }
2746 
2747 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2748 	__acquires(unix_table_lock)
2749 {
2750 	spin_lock(&unix_table_lock);
2751 
2752 	if (!*pos)
2753 		return SEQ_START_TOKEN;
2754 
2755 	if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2756 		return NULL;
2757 
2758 	return unix_next_socket(seq, NULL, pos);
2759 }
2760 
2761 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2762 {
2763 	++*pos;
2764 	return unix_next_socket(seq, v, pos);
2765 }
2766 
2767 static void unix_seq_stop(struct seq_file *seq, void *v)
2768 	__releases(unix_table_lock)
2769 {
2770 	spin_unlock(&unix_table_lock);
2771 }
2772 
2773 static int unix_seq_show(struct seq_file *seq, void *v)
2774 {
2775 
2776 	if (v == SEQ_START_TOKEN)
2777 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2778 			 "Inode Path\n");
2779 	else {
2780 		struct sock *s = v;
2781 		struct unix_sock *u = unix_sk(s);
2782 		unix_state_lock(s);
2783 
2784 		seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2785 			s,
2786 			refcount_read(&s->sk_refcnt),
2787 			0,
2788 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2789 			s->sk_type,
2790 			s->sk_socket ?
2791 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2792 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2793 			sock_i_ino(s));
2794 
2795 		if (u->addr) {	// under unix_table_lock here
2796 			int i, len;
2797 			seq_putc(seq, ' ');
2798 
2799 			i = 0;
2800 			len = u->addr->len - sizeof(short);
2801 			if (!UNIX_ABSTRACT(s))
2802 				len--;
2803 			else {
2804 				seq_putc(seq, '@');
2805 				i++;
2806 			}
2807 			for ( ; i < len; i++)
2808 				seq_putc(seq, u->addr->name->sun_path[i] ?:
2809 					 '@');
2810 		}
2811 		unix_state_unlock(s);
2812 		seq_putc(seq, '\n');
2813 	}
2814 
2815 	return 0;
2816 }
2817 
2818 static const struct seq_operations unix_seq_ops = {
2819 	.start  = unix_seq_start,
2820 	.next   = unix_seq_next,
2821 	.stop   = unix_seq_stop,
2822 	.show   = unix_seq_show,
2823 };
2824 #endif
2825 
2826 static const struct net_proto_family unix_family_ops = {
2827 	.family = PF_UNIX,
2828 	.create = unix_create,
2829 	.owner	= THIS_MODULE,
2830 };
2831 
2832 
2833 static int __net_init unix_net_init(struct net *net)
2834 {
2835 	int error = -ENOMEM;
2836 
2837 	net->unx.sysctl_max_dgram_qlen = 10;
2838 	if (unix_sysctl_register(net))
2839 		goto out;
2840 
2841 #ifdef CONFIG_PROC_FS
2842 	if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
2843 			sizeof(struct seq_net_private))) {
2844 		unix_sysctl_unregister(net);
2845 		goto out;
2846 	}
2847 #endif
2848 	error = 0;
2849 out:
2850 	return error;
2851 }
2852 
2853 static void __net_exit unix_net_exit(struct net *net)
2854 {
2855 	unix_sysctl_unregister(net);
2856 	remove_proc_entry("unix", net->proc_net);
2857 }
2858 
2859 static struct pernet_operations unix_net_ops = {
2860 	.init = unix_net_init,
2861 	.exit = unix_net_exit,
2862 };
2863 
2864 static int __init af_unix_init(void)
2865 {
2866 	int rc = -1;
2867 
2868 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
2869 
2870 	rc = proto_register(&unix_proto, 1);
2871 	if (rc != 0) {
2872 		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2873 		goto out;
2874 	}
2875 
2876 	sock_register(&unix_family_ops);
2877 	register_pernet_subsys(&unix_net_ops);
2878 out:
2879 	return rc;
2880 }
2881 
2882 static void __exit af_unix_exit(void)
2883 {
2884 	sock_unregister(PF_UNIX);
2885 	proto_unregister(&unix_proto);
2886 	unregister_pernet_subsys(&unix_net_ops);
2887 }
2888 
2889 /* Earlier than device_initcall() so that other drivers invoking
2890    request_module() don't end up in a loop when modprobe tries
2891    to use a UNIX socket. But later than subsys_initcall() because
2892    we depend on stuff initialised there */
2893 fs_initcall(af_unix_init);
2894 module_exit(af_unix_exit);
2895 
2896 MODULE_LICENSE("GPL");
2897 MODULE_ALIAS_NETPROTO(PF_UNIX);
2898