xref: /linux/net/unix/af_unix.c (revision 015d239ac0142ad0e26567fd890ef8d171f13709)
1  // SPDX-License-Identifier: GPL-2.0-or-later
2  /*
3   * NET4:	Implementation of BSD Unix domain sockets.
4   *
5   * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
6   *
7   * Fixes:
8   *		Linus Torvalds	:	Assorted bug cures.
9   *		Niibe Yutaka	:	async I/O support.
10   *		Carsten Paeth	:	PF_UNIX check, address fixes.
11   *		Alan Cox	:	Limit size of allocated blocks.
12   *		Alan Cox	:	Fixed the stupid socketpair bug.
13   *		Alan Cox	:	BSD compatibility fine tuning.
14   *		Alan Cox	:	Fixed a bug in connect when interrupted.
15   *		Alan Cox	:	Sorted out a proper draft version of
16   *					file descriptor passing hacked up from
17   *					Mike Shaver's work.
18   *		Marty Leisner	:	Fixes to fd passing
19   *		Nick Nevin	:	recvmsg bugfix.
20   *		Alan Cox	:	Started proper garbage collector
21   *		Heiko EiBfeldt	:	Missing verify_area check
22   *		Alan Cox	:	Started POSIXisms
23   *		Andreas Schwab	:	Replace inode by dentry for proper
24   *					reference counting
25   *		Kirk Petersen	:	Made this a module
26   *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
27   *					Lots of bug fixes.
28   *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
29   *					by above two patches.
30   *	     Andrea Arcangeli	:	If possible we block in connect(2)
31   *					if the max backlog of the listen socket
32   *					is been reached. This won't break
33   *					old apps and it will avoid huge amount
34   *					of socks hashed (this for unix_gc()
35   *					performances reasons).
36   *					Security fix that limits the max
37   *					number of socks to 2*max_files and
38   *					the number of skb queueable in the
39   *					dgram receiver.
40   *		Artur Skawina   :	Hash function optimizations
41   *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
42   *	      Malcolm Beattie   :	Set peercred for socketpair
43   *	     Michal Ostrowski   :       Module initialization cleanup.
44   *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
45   *	     				the core infrastructure is doing that
46   *	     				for all net proto families now (2.5.69+)
47   *
48   * Known differences from reference BSD that was tested:
49   *
50   *	[TO FIX]
51   *	ECONNREFUSED is not returned from one end of a connected() socket to the
52   *		other the moment one end closes.
53   *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
54   *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
55   *	[NOT TO FIX]
56   *	accept() returns a path name even if the connecting socket has closed
57   *		in the meantime (BSD loses the path and gives up).
58   *	accept() returns 0 length path for an unbound connector. BSD returns 16
59   *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60   *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
61   *	BSD af_unix apparently has connect forgetting to block properly.
62   *		(need to check this with the POSIX spec in detail)
63   *
64   * Differences from 2.0.0-11-... (ANK)
65   *	Bug fixes and improvements.
66   *		- client shutdown killed server socket.
67   *		- removed all useless cli/sti pairs.
68   *
69   *	Semantic changes/extensions.
70   *		- generic control message passing.
71   *		- SCM_CREDENTIALS control message.
72   *		- "Abstract" (not FS based) socket bindings.
73   *		  Abstract names are sequences of bytes (not zero terminated)
74   *		  started by 0, so that this name space does not intersect
75   *		  with BSD names.
76   */
77  
78  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
79  
80  #include <linux/module.h>
81  #include <linux/kernel.h>
82  #include <linux/signal.h>
83  #include <linux/sched/signal.h>
84  #include <linux/errno.h>
85  #include <linux/string.h>
86  #include <linux/stat.h>
87  #include <linux/dcache.h>
88  #include <linux/namei.h>
89  #include <linux/socket.h>
90  #include <linux/un.h>
91  #include <linux/fcntl.h>
92  #include <linux/termios.h>
93  #include <linux/sockios.h>
94  #include <linux/net.h>
95  #include <linux/in.h>
96  #include <linux/fs.h>
97  #include <linux/slab.h>
98  #include <linux/uaccess.h>
99  #include <linux/skbuff.h>
100  #include <linux/netdevice.h>
101  #include <net/net_namespace.h>
102  #include <net/sock.h>
103  #include <net/tcp_states.h>
104  #include <net/af_unix.h>
105  #include <linux/proc_fs.h>
106  #include <linux/seq_file.h>
107  #include <net/scm.h>
108  #include <linux/init.h>
109  #include <linux/poll.h>
110  #include <linux/rtnetlink.h>
111  #include <linux/mount.h>
112  #include <net/checksum.h>
113  #include <linux/security.h>
114  #include <linux/freezer.h>
115  #include <linux/file.h>
116  
117  #include "scm.h"
118  
119  struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
120  EXPORT_SYMBOL_GPL(unix_socket_table);
121  DEFINE_SPINLOCK(unix_table_lock);
122  EXPORT_SYMBOL_GPL(unix_table_lock);
123  static atomic_long_t unix_nr_socks;
124  
125  
126  static struct hlist_head *unix_sockets_unbound(void *addr)
127  {
128  	unsigned long hash = (unsigned long)addr;
129  
130  	hash ^= hash >> 16;
131  	hash ^= hash >> 8;
132  	hash %= UNIX_HASH_SIZE;
133  	return &unix_socket_table[UNIX_HASH_SIZE + hash];
134  }
135  
136  #define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
137  
138  #ifdef CONFIG_SECURITY_NETWORK
139  static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
140  {
141  	UNIXCB(skb).secid = scm->secid;
142  }
143  
144  static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
145  {
146  	scm->secid = UNIXCB(skb).secid;
147  }
148  
149  static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
150  {
151  	return (scm->secid == UNIXCB(skb).secid);
152  }
153  #else
154  static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
155  { }
156  
157  static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
158  { }
159  
160  static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
161  {
162  	return true;
163  }
164  #endif /* CONFIG_SECURITY_NETWORK */
165  
166  /*
167   *  SMP locking strategy:
168   *    hash table is protected with spinlock unix_table_lock
169   *    each socket state is protected by separate spin lock.
170   */
171  
172  static inline unsigned int unix_hash_fold(__wsum n)
173  {
174  	unsigned int hash = (__force unsigned int)csum_fold(n);
175  
176  	hash ^= hash>>8;
177  	return hash&(UNIX_HASH_SIZE-1);
178  }
179  
180  #define unix_peer(sk) (unix_sk(sk)->peer)
181  
182  static inline int unix_our_peer(struct sock *sk, struct sock *osk)
183  {
184  	return unix_peer(osk) == sk;
185  }
186  
187  static inline int unix_may_send(struct sock *sk, struct sock *osk)
188  {
189  	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
190  }
191  
192  static inline int unix_recvq_full(const struct sock *sk)
193  {
194  	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
195  }
196  
197  static inline int unix_recvq_full_lockless(const struct sock *sk)
198  {
199  	return skb_queue_len_lockless(&sk->sk_receive_queue) >
200  		READ_ONCE(sk->sk_max_ack_backlog);
201  }
202  
203  struct sock *unix_peer_get(struct sock *s)
204  {
205  	struct sock *peer;
206  
207  	unix_state_lock(s);
208  	peer = unix_peer(s);
209  	if (peer)
210  		sock_hold(peer);
211  	unix_state_unlock(s);
212  	return peer;
213  }
214  EXPORT_SYMBOL_GPL(unix_peer_get);
215  
216  static inline void unix_release_addr(struct unix_address *addr)
217  {
218  	if (refcount_dec_and_test(&addr->refcnt))
219  		kfree(addr);
220  }
221  
222  /*
223   *	Check unix socket name:
224   *		- should be not zero length.
225   *	        - if started by not zero, should be NULL terminated (FS object)
226   *		- if started by zero, it is abstract name.
227   */
228  
229  static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
230  {
231  	*hashp = 0;
232  
233  	if (len <= sizeof(short) || len > sizeof(*sunaddr))
234  		return -EINVAL;
235  	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
236  		return -EINVAL;
237  	if (sunaddr->sun_path[0]) {
238  		/*
239  		 * This may look like an off by one error but it is a bit more
240  		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
241  		 * sun_path[108] doesn't as such exist.  However in kernel space
242  		 * we are guaranteed that it is a valid memory location in our
243  		 * kernel address buffer.
244  		 */
245  		((char *)sunaddr)[len] = 0;
246  		len = strlen(sunaddr->sun_path)+1+sizeof(short);
247  		return len;
248  	}
249  
250  	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
251  	return len;
252  }
253  
254  static void __unix_remove_socket(struct sock *sk)
255  {
256  	sk_del_node_init(sk);
257  }
258  
259  static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
260  {
261  	WARN_ON(!sk_unhashed(sk));
262  	sk_add_node(sk, list);
263  }
264  
265  static inline void unix_remove_socket(struct sock *sk)
266  {
267  	spin_lock(&unix_table_lock);
268  	__unix_remove_socket(sk);
269  	spin_unlock(&unix_table_lock);
270  }
271  
272  static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
273  {
274  	spin_lock(&unix_table_lock);
275  	__unix_insert_socket(list, sk);
276  	spin_unlock(&unix_table_lock);
277  }
278  
279  static struct sock *__unix_find_socket_byname(struct net *net,
280  					      struct sockaddr_un *sunname,
281  					      int len, int type, unsigned int hash)
282  {
283  	struct sock *s;
284  
285  	sk_for_each(s, &unix_socket_table[hash ^ type]) {
286  		struct unix_sock *u = unix_sk(s);
287  
288  		if (!net_eq(sock_net(s), net))
289  			continue;
290  
291  		if (u->addr->len == len &&
292  		    !memcmp(u->addr->name, sunname, len))
293  			return s;
294  	}
295  	return NULL;
296  }
297  
298  static inline struct sock *unix_find_socket_byname(struct net *net,
299  						   struct sockaddr_un *sunname,
300  						   int len, int type,
301  						   unsigned int hash)
302  {
303  	struct sock *s;
304  
305  	spin_lock(&unix_table_lock);
306  	s = __unix_find_socket_byname(net, sunname, len, type, hash);
307  	if (s)
308  		sock_hold(s);
309  	spin_unlock(&unix_table_lock);
310  	return s;
311  }
312  
313  static struct sock *unix_find_socket_byinode(struct inode *i)
314  {
315  	struct sock *s;
316  
317  	spin_lock(&unix_table_lock);
318  	sk_for_each(s,
319  		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
320  		struct dentry *dentry = unix_sk(s)->path.dentry;
321  
322  		if (dentry && d_backing_inode(dentry) == i) {
323  			sock_hold(s);
324  			goto found;
325  		}
326  	}
327  	s = NULL;
328  found:
329  	spin_unlock(&unix_table_lock);
330  	return s;
331  }
332  
333  /* Support code for asymmetrically connected dgram sockets
334   *
335   * If a datagram socket is connected to a socket not itself connected
336   * to the first socket (eg, /dev/log), clients may only enqueue more
337   * messages if the present receive queue of the server socket is not
338   * "too large". This means there's a second writeability condition
339   * poll and sendmsg need to test. The dgram recv code will do a wake
340   * up on the peer_wait wait queue of a socket upon reception of a
341   * datagram which needs to be propagated to sleeping would-be writers
342   * since these might not have sent anything so far. This can't be
343   * accomplished via poll_wait because the lifetime of the server
344   * socket might be less than that of its clients if these break their
345   * association with it or if the server socket is closed while clients
346   * are still connected to it and there's no way to inform "a polling
347   * implementation" that it should let go of a certain wait queue
348   *
349   * In order to propagate a wake up, a wait_queue_entry_t of the client
350   * socket is enqueued on the peer_wait queue of the server socket
351   * whose wake function does a wake_up on the ordinary client socket
352   * wait queue. This connection is established whenever a write (or
353   * poll for write) hit the flow control condition and broken when the
354   * association to the server socket is dissolved or after a wake up
355   * was relayed.
356   */
357  
358  static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
359  				      void *key)
360  {
361  	struct unix_sock *u;
362  	wait_queue_head_t *u_sleep;
363  
364  	u = container_of(q, struct unix_sock, peer_wake);
365  
366  	__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
367  			    q);
368  	u->peer_wake.private = NULL;
369  
370  	/* relaying can only happen while the wq still exists */
371  	u_sleep = sk_sleep(&u->sk);
372  	if (u_sleep)
373  		wake_up_interruptible_poll(u_sleep, key_to_poll(key));
374  
375  	return 0;
376  }
377  
378  static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
379  {
380  	struct unix_sock *u, *u_other;
381  	int rc;
382  
383  	u = unix_sk(sk);
384  	u_other = unix_sk(other);
385  	rc = 0;
386  	spin_lock(&u_other->peer_wait.lock);
387  
388  	if (!u->peer_wake.private) {
389  		u->peer_wake.private = other;
390  		__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
391  
392  		rc = 1;
393  	}
394  
395  	spin_unlock(&u_other->peer_wait.lock);
396  	return rc;
397  }
398  
399  static void unix_dgram_peer_wake_disconnect(struct sock *sk,
400  					    struct sock *other)
401  {
402  	struct unix_sock *u, *u_other;
403  
404  	u = unix_sk(sk);
405  	u_other = unix_sk(other);
406  	spin_lock(&u_other->peer_wait.lock);
407  
408  	if (u->peer_wake.private == other) {
409  		__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
410  		u->peer_wake.private = NULL;
411  	}
412  
413  	spin_unlock(&u_other->peer_wait.lock);
414  }
415  
416  static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
417  						   struct sock *other)
418  {
419  	unix_dgram_peer_wake_disconnect(sk, other);
420  	wake_up_interruptible_poll(sk_sleep(sk),
421  				   EPOLLOUT |
422  				   EPOLLWRNORM |
423  				   EPOLLWRBAND);
424  }
425  
426  /* preconditions:
427   *	- unix_peer(sk) == other
428   *	- association is stable
429   */
430  static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
431  {
432  	int connected;
433  
434  	connected = unix_dgram_peer_wake_connect(sk, other);
435  
436  	/* If other is SOCK_DEAD, we want to make sure we signal
437  	 * POLLOUT, such that a subsequent write() can get a
438  	 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
439  	 * to other and its full, we will hang waiting for POLLOUT.
440  	 */
441  	if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
442  		return 1;
443  
444  	if (connected)
445  		unix_dgram_peer_wake_disconnect(sk, other);
446  
447  	return 0;
448  }
449  
450  static int unix_writable(const struct sock *sk)
451  {
452  	return sk->sk_state != TCP_LISTEN &&
453  	       (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
454  }
455  
456  static void unix_write_space(struct sock *sk)
457  {
458  	struct socket_wq *wq;
459  
460  	rcu_read_lock();
461  	if (unix_writable(sk)) {
462  		wq = rcu_dereference(sk->sk_wq);
463  		if (skwq_has_sleeper(wq))
464  			wake_up_interruptible_sync_poll(&wq->wait,
465  				EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
466  		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
467  	}
468  	rcu_read_unlock();
469  }
470  
471  /* When dgram socket disconnects (or changes its peer), we clear its receive
472   * queue of packets arrived from previous peer. First, it allows to do
473   * flow control based only on wmem_alloc; second, sk connected to peer
474   * may receive messages only from that peer. */
475  static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
476  {
477  	if (!skb_queue_empty(&sk->sk_receive_queue)) {
478  		skb_queue_purge(&sk->sk_receive_queue);
479  		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
480  
481  		/* If one link of bidirectional dgram pipe is disconnected,
482  		 * we signal error. Messages are lost. Do not make this,
483  		 * when peer was not connected to us.
484  		 */
485  		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
486  			other->sk_err = ECONNRESET;
487  			other->sk_error_report(other);
488  		}
489  	}
490  }
491  
492  static void unix_sock_destructor(struct sock *sk)
493  {
494  	struct unix_sock *u = unix_sk(sk);
495  
496  	skb_queue_purge(&sk->sk_receive_queue);
497  
498  	WARN_ON(refcount_read(&sk->sk_wmem_alloc));
499  	WARN_ON(!sk_unhashed(sk));
500  	WARN_ON(sk->sk_socket);
501  	if (!sock_flag(sk, SOCK_DEAD)) {
502  		pr_info("Attempt to release alive unix socket: %p\n", sk);
503  		return;
504  	}
505  
506  	if (u->addr)
507  		unix_release_addr(u->addr);
508  
509  	atomic_long_dec(&unix_nr_socks);
510  	local_bh_disable();
511  	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
512  	local_bh_enable();
513  #ifdef UNIX_REFCNT_DEBUG
514  	pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
515  		atomic_long_read(&unix_nr_socks));
516  #endif
517  }
518  
519  static void unix_release_sock(struct sock *sk, int embrion)
520  {
521  	struct unix_sock *u = unix_sk(sk);
522  	struct path path;
523  	struct sock *skpair;
524  	struct sk_buff *skb;
525  	int state;
526  
527  	unix_remove_socket(sk);
528  
529  	/* Clear state */
530  	unix_state_lock(sk);
531  	sock_orphan(sk);
532  	sk->sk_shutdown = SHUTDOWN_MASK;
533  	path	     = u->path;
534  	u->path.dentry = NULL;
535  	u->path.mnt = NULL;
536  	state = sk->sk_state;
537  	sk->sk_state = TCP_CLOSE;
538  	unix_state_unlock(sk);
539  
540  	wake_up_interruptible_all(&u->peer_wait);
541  
542  	skpair = unix_peer(sk);
543  
544  	if (skpair != NULL) {
545  		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
546  			unix_state_lock(skpair);
547  			/* No more writes */
548  			skpair->sk_shutdown = SHUTDOWN_MASK;
549  			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
550  				skpair->sk_err = ECONNRESET;
551  			unix_state_unlock(skpair);
552  			skpair->sk_state_change(skpair);
553  			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
554  		}
555  
556  		unix_dgram_peer_wake_disconnect(sk, skpair);
557  		sock_put(skpair); /* It may now die */
558  		unix_peer(sk) = NULL;
559  	}
560  
561  	/* Try to flush out this socket. Throw out buffers at least */
562  
563  	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
564  		if (state == TCP_LISTEN)
565  			unix_release_sock(skb->sk, 1);
566  		/* passed fds are erased in the kfree_skb hook	      */
567  		UNIXCB(skb).consumed = skb->len;
568  		kfree_skb(skb);
569  	}
570  
571  	if (path.dentry)
572  		path_put(&path);
573  
574  	sock_put(sk);
575  
576  	/* ---- Socket is dead now and most probably destroyed ---- */
577  
578  	/*
579  	 * Fixme: BSD difference: In BSD all sockets connected to us get
580  	 *	  ECONNRESET and we die on the spot. In Linux we behave
581  	 *	  like files and pipes do and wait for the last
582  	 *	  dereference.
583  	 *
584  	 * Can't we simply set sock->err?
585  	 *
586  	 *	  What the above comment does talk about? --ANK(980817)
587  	 */
588  
589  	if (unix_tot_inflight)
590  		unix_gc();		/* Garbage collect fds */
591  }
592  
593  static void init_peercred(struct sock *sk)
594  {
595  	put_pid(sk->sk_peer_pid);
596  	if (sk->sk_peer_cred)
597  		put_cred(sk->sk_peer_cred);
598  	sk->sk_peer_pid  = get_pid(task_tgid(current));
599  	sk->sk_peer_cred = get_current_cred();
600  }
601  
602  static void copy_peercred(struct sock *sk, struct sock *peersk)
603  {
604  	put_pid(sk->sk_peer_pid);
605  	if (sk->sk_peer_cred)
606  		put_cred(sk->sk_peer_cred);
607  	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
608  	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
609  }
610  
611  static int unix_listen(struct socket *sock, int backlog)
612  {
613  	int err;
614  	struct sock *sk = sock->sk;
615  	struct unix_sock *u = unix_sk(sk);
616  	struct pid *old_pid = NULL;
617  
618  	err = -EOPNOTSUPP;
619  	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
620  		goto out;	/* Only stream/seqpacket sockets accept */
621  	err = -EINVAL;
622  	if (!u->addr)
623  		goto out;	/* No listens on an unbound socket */
624  	unix_state_lock(sk);
625  	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
626  		goto out_unlock;
627  	if (backlog > sk->sk_max_ack_backlog)
628  		wake_up_interruptible_all(&u->peer_wait);
629  	sk->sk_max_ack_backlog	= backlog;
630  	sk->sk_state		= TCP_LISTEN;
631  	/* set credentials so connect can copy them */
632  	init_peercred(sk);
633  	err = 0;
634  
635  out_unlock:
636  	unix_state_unlock(sk);
637  	put_pid(old_pid);
638  out:
639  	return err;
640  }
641  
642  static int unix_release(struct socket *);
643  static int unix_bind(struct socket *, struct sockaddr *, int);
644  static int unix_stream_connect(struct socket *, struct sockaddr *,
645  			       int addr_len, int flags);
646  static int unix_socketpair(struct socket *, struct socket *);
647  static int unix_accept(struct socket *, struct socket *, int, bool);
648  static int unix_getname(struct socket *, struct sockaddr *, int);
649  static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
650  static __poll_t unix_dgram_poll(struct file *, struct socket *,
651  				    poll_table *);
652  static int unix_ioctl(struct socket *, unsigned int, unsigned long);
653  #ifdef CONFIG_COMPAT
654  static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
655  #endif
656  static int unix_shutdown(struct socket *, int);
657  static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
658  static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
659  static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
660  				    size_t size, int flags);
661  static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
662  				       struct pipe_inode_info *, size_t size,
663  				       unsigned int flags);
664  static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
665  static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
666  static int unix_dgram_connect(struct socket *, struct sockaddr *,
667  			      int, int);
668  static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
669  static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
670  				  int);
671  
672  static int unix_set_peek_off(struct sock *sk, int val)
673  {
674  	struct unix_sock *u = unix_sk(sk);
675  
676  	if (mutex_lock_interruptible(&u->iolock))
677  		return -EINTR;
678  
679  	sk->sk_peek_off = val;
680  	mutex_unlock(&u->iolock);
681  
682  	return 0;
683  }
684  
685  static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
686  {
687  	struct sock *sk = sock->sk;
688  	struct unix_sock *u;
689  
690  	if (sk) {
691  		u = unix_sk(sock->sk);
692  		seq_printf(m, "scm_fds: %u\n", READ_ONCE(u->scm_stat.nr_fds));
693  	}
694  }
695  
696  static const struct proto_ops unix_stream_ops = {
697  	.family =	PF_UNIX,
698  	.owner =	THIS_MODULE,
699  	.release =	unix_release,
700  	.bind =		unix_bind,
701  	.connect =	unix_stream_connect,
702  	.socketpair =	unix_socketpair,
703  	.accept =	unix_accept,
704  	.getname =	unix_getname,
705  	.poll =		unix_poll,
706  	.ioctl =	unix_ioctl,
707  #ifdef CONFIG_COMPAT
708  	.compat_ioctl =	unix_compat_ioctl,
709  #endif
710  	.listen =	unix_listen,
711  	.shutdown =	unix_shutdown,
712  	.setsockopt =	sock_no_setsockopt,
713  	.getsockopt =	sock_no_getsockopt,
714  	.sendmsg =	unix_stream_sendmsg,
715  	.recvmsg =	unix_stream_recvmsg,
716  	.mmap =		sock_no_mmap,
717  	.sendpage =	unix_stream_sendpage,
718  	.splice_read =	unix_stream_splice_read,
719  	.set_peek_off =	unix_set_peek_off,
720  	.show_fdinfo =	unix_show_fdinfo,
721  };
722  
723  static const struct proto_ops unix_dgram_ops = {
724  	.family =	PF_UNIX,
725  	.owner =	THIS_MODULE,
726  	.release =	unix_release,
727  	.bind =		unix_bind,
728  	.connect =	unix_dgram_connect,
729  	.socketpair =	unix_socketpair,
730  	.accept =	sock_no_accept,
731  	.getname =	unix_getname,
732  	.poll =		unix_dgram_poll,
733  	.ioctl =	unix_ioctl,
734  #ifdef CONFIG_COMPAT
735  	.compat_ioctl =	unix_compat_ioctl,
736  #endif
737  	.listen =	sock_no_listen,
738  	.shutdown =	unix_shutdown,
739  	.setsockopt =	sock_no_setsockopt,
740  	.getsockopt =	sock_no_getsockopt,
741  	.sendmsg =	unix_dgram_sendmsg,
742  	.recvmsg =	unix_dgram_recvmsg,
743  	.mmap =		sock_no_mmap,
744  	.sendpage =	sock_no_sendpage,
745  	.set_peek_off =	unix_set_peek_off,
746  	.show_fdinfo =	unix_show_fdinfo,
747  };
748  
749  static const struct proto_ops unix_seqpacket_ops = {
750  	.family =	PF_UNIX,
751  	.owner =	THIS_MODULE,
752  	.release =	unix_release,
753  	.bind =		unix_bind,
754  	.connect =	unix_stream_connect,
755  	.socketpair =	unix_socketpair,
756  	.accept =	unix_accept,
757  	.getname =	unix_getname,
758  	.poll =		unix_dgram_poll,
759  	.ioctl =	unix_ioctl,
760  #ifdef CONFIG_COMPAT
761  	.compat_ioctl =	unix_compat_ioctl,
762  #endif
763  	.listen =	unix_listen,
764  	.shutdown =	unix_shutdown,
765  	.setsockopt =	sock_no_setsockopt,
766  	.getsockopt =	sock_no_getsockopt,
767  	.sendmsg =	unix_seqpacket_sendmsg,
768  	.recvmsg =	unix_seqpacket_recvmsg,
769  	.mmap =		sock_no_mmap,
770  	.sendpage =	sock_no_sendpage,
771  	.set_peek_off =	unix_set_peek_off,
772  	.show_fdinfo =	unix_show_fdinfo,
773  };
774  
775  static struct proto unix_proto = {
776  	.name			= "UNIX",
777  	.owner			= THIS_MODULE,
778  	.obj_size		= sizeof(struct unix_sock),
779  };
780  
781  static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
782  {
783  	struct sock *sk = NULL;
784  	struct unix_sock *u;
785  
786  	atomic_long_inc(&unix_nr_socks);
787  	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
788  		goto out;
789  
790  	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
791  	if (!sk)
792  		goto out;
793  
794  	sock_init_data(sock, sk);
795  
796  	sk->sk_allocation	= GFP_KERNEL_ACCOUNT;
797  	sk->sk_write_space	= unix_write_space;
798  	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
799  	sk->sk_destruct		= unix_sock_destructor;
800  	u	  = unix_sk(sk);
801  	u->path.dentry = NULL;
802  	u->path.mnt = NULL;
803  	spin_lock_init(&u->lock);
804  	atomic_long_set(&u->inflight, 0);
805  	INIT_LIST_HEAD(&u->link);
806  	mutex_init(&u->iolock); /* single task reading lock */
807  	mutex_init(&u->bindlock); /* single task binding lock */
808  	init_waitqueue_head(&u->peer_wait);
809  	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
810  	memset(&u->scm_stat, 0, sizeof(struct scm_stat));
811  	unix_insert_socket(unix_sockets_unbound(sk), sk);
812  out:
813  	if (sk == NULL)
814  		atomic_long_dec(&unix_nr_socks);
815  	else {
816  		local_bh_disable();
817  		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
818  		local_bh_enable();
819  	}
820  	return sk;
821  }
822  
823  static int unix_create(struct net *net, struct socket *sock, int protocol,
824  		       int kern)
825  {
826  	if (protocol && protocol != PF_UNIX)
827  		return -EPROTONOSUPPORT;
828  
829  	sock->state = SS_UNCONNECTED;
830  
831  	switch (sock->type) {
832  	case SOCK_STREAM:
833  		sock->ops = &unix_stream_ops;
834  		break;
835  		/*
836  		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
837  		 *	nothing uses it.
838  		 */
839  	case SOCK_RAW:
840  		sock->type = SOCK_DGRAM;
841  		/* fall through */
842  	case SOCK_DGRAM:
843  		sock->ops = &unix_dgram_ops;
844  		break;
845  	case SOCK_SEQPACKET:
846  		sock->ops = &unix_seqpacket_ops;
847  		break;
848  	default:
849  		return -ESOCKTNOSUPPORT;
850  	}
851  
852  	return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
853  }
854  
855  static int unix_release(struct socket *sock)
856  {
857  	struct sock *sk = sock->sk;
858  
859  	if (!sk)
860  		return 0;
861  
862  	unix_release_sock(sk, 0);
863  	sock->sk = NULL;
864  
865  	return 0;
866  }
867  
868  static int unix_autobind(struct socket *sock)
869  {
870  	struct sock *sk = sock->sk;
871  	struct net *net = sock_net(sk);
872  	struct unix_sock *u = unix_sk(sk);
873  	static u32 ordernum = 1;
874  	struct unix_address *addr;
875  	int err;
876  	unsigned int retries = 0;
877  
878  	err = mutex_lock_interruptible(&u->bindlock);
879  	if (err)
880  		return err;
881  
882  	err = 0;
883  	if (u->addr)
884  		goto out;
885  
886  	err = -ENOMEM;
887  	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
888  	if (!addr)
889  		goto out;
890  
891  	addr->name->sun_family = AF_UNIX;
892  	refcount_set(&addr->refcnt, 1);
893  
894  retry:
895  	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
896  	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
897  
898  	spin_lock(&unix_table_lock);
899  	ordernum = (ordernum+1)&0xFFFFF;
900  
901  	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
902  				      addr->hash)) {
903  		spin_unlock(&unix_table_lock);
904  		/*
905  		 * __unix_find_socket_byname() may take long time if many names
906  		 * are already in use.
907  		 */
908  		cond_resched();
909  		/* Give up if all names seems to be in use. */
910  		if (retries++ == 0xFFFFF) {
911  			err = -ENOSPC;
912  			kfree(addr);
913  			goto out;
914  		}
915  		goto retry;
916  	}
917  	addr->hash ^= sk->sk_type;
918  
919  	__unix_remove_socket(sk);
920  	smp_store_release(&u->addr, addr);
921  	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
922  	spin_unlock(&unix_table_lock);
923  	err = 0;
924  
925  out:	mutex_unlock(&u->bindlock);
926  	return err;
927  }
928  
929  static struct sock *unix_find_other(struct net *net,
930  				    struct sockaddr_un *sunname, int len,
931  				    int type, unsigned int hash, int *error)
932  {
933  	struct sock *u;
934  	struct path path;
935  	int err = 0;
936  
937  	if (sunname->sun_path[0]) {
938  		struct inode *inode;
939  		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
940  		if (err)
941  			goto fail;
942  		inode = d_backing_inode(path.dentry);
943  		err = inode_permission(inode, MAY_WRITE);
944  		if (err)
945  			goto put_fail;
946  
947  		err = -ECONNREFUSED;
948  		if (!S_ISSOCK(inode->i_mode))
949  			goto put_fail;
950  		u = unix_find_socket_byinode(inode);
951  		if (!u)
952  			goto put_fail;
953  
954  		if (u->sk_type == type)
955  			touch_atime(&path);
956  
957  		path_put(&path);
958  
959  		err = -EPROTOTYPE;
960  		if (u->sk_type != type) {
961  			sock_put(u);
962  			goto fail;
963  		}
964  	} else {
965  		err = -ECONNREFUSED;
966  		u = unix_find_socket_byname(net, sunname, len, type, hash);
967  		if (u) {
968  			struct dentry *dentry;
969  			dentry = unix_sk(u)->path.dentry;
970  			if (dentry)
971  				touch_atime(&unix_sk(u)->path);
972  		} else
973  			goto fail;
974  	}
975  	return u;
976  
977  put_fail:
978  	path_put(&path);
979  fail:
980  	*error = err;
981  	return NULL;
982  }
983  
984  static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
985  {
986  	struct dentry *dentry;
987  	struct path path;
988  	int err = 0;
989  	/*
990  	 * Get the parent directory, calculate the hash for last
991  	 * component.
992  	 */
993  	dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
994  	err = PTR_ERR(dentry);
995  	if (IS_ERR(dentry))
996  		return err;
997  
998  	/*
999  	 * All right, let's create it.
1000  	 */
1001  	err = security_path_mknod(&path, dentry, mode, 0);
1002  	if (!err) {
1003  		err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
1004  		if (!err) {
1005  			res->mnt = mntget(path.mnt);
1006  			res->dentry = dget(dentry);
1007  		}
1008  	}
1009  	done_path_create(&path, dentry);
1010  	return err;
1011  }
1012  
1013  static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1014  {
1015  	struct sock *sk = sock->sk;
1016  	struct net *net = sock_net(sk);
1017  	struct unix_sock *u = unix_sk(sk);
1018  	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1019  	char *sun_path = sunaddr->sun_path;
1020  	int err;
1021  	unsigned int hash;
1022  	struct unix_address *addr;
1023  	struct hlist_head *list;
1024  	struct path path = { };
1025  
1026  	err = -EINVAL;
1027  	if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1028  	    sunaddr->sun_family != AF_UNIX)
1029  		goto out;
1030  
1031  	if (addr_len == sizeof(short)) {
1032  		err = unix_autobind(sock);
1033  		goto out;
1034  	}
1035  
1036  	err = unix_mkname(sunaddr, addr_len, &hash);
1037  	if (err < 0)
1038  		goto out;
1039  	addr_len = err;
1040  
1041  	if (sun_path[0]) {
1042  		umode_t mode = S_IFSOCK |
1043  		       (SOCK_INODE(sock)->i_mode & ~current_umask());
1044  		err = unix_mknod(sun_path, mode, &path);
1045  		if (err) {
1046  			if (err == -EEXIST)
1047  				err = -EADDRINUSE;
1048  			goto out;
1049  		}
1050  	}
1051  
1052  	err = mutex_lock_interruptible(&u->bindlock);
1053  	if (err)
1054  		goto out_put;
1055  
1056  	err = -EINVAL;
1057  	if (u->addr)
1058  		goto out_up;
1059  
1060  	err = -ENOMEM;
1061  	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1062  	if (!addr)
1063  		goto out_up;
1064  
1065  	memcpy(addr->name, sunaddr, addr_len);
1066  	addr->len = addr_len;
1067  	addr->hash = hash ^ sk->sk_type;
1068  	refcount_set(&addr->refcnt, 1);
1069  
1070  	if (sun_path[0]) {
1071  		addr->hash = UNIX_HASH_SIZE;
1072  		hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1073  		spin_lock(&unix_table_lock);
1074  		u->path = path;
1075  		list = &unix_socket_table[hash];
1076  	} else {
1077  		spin_lock(&unix_table_lock);
1078  		err = -EADDRINUSE;
1079  		if (__unix_find_socket_byname(net, sunaddr, addr_len,
1080  					      sk->sk_type, hash)) {
1081  			unix_release_addr(addr);
1082  			goto out_unlock;
1083  		}
1084  
1085  		list = &unix_socket_table[addr->hash];
1086  	}
1087  
1088  	err = 0;
1089  	__unix_remove_socket(sk);
1090  	smp_store_release(&u->addr, addr);
1091  	__unix_insert_socket(list, sk);
1092  
1093  out_unlock:
1094  	spin_unlock(&unix_table_lock);
1095  out_up:
1096  	mutex_unlock(&u->bindlock);
1097  out_put:
1098  	if (err)
1099  		path_put(&path);
1100  out:
1101  	return err;
1102  }
1103  
1104  static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1105  {
1106  	if (unlikely(sk1 == sk2) || !sk2) {
1107  		unix_state_lock(sk1);
1108  		return;
1109  	}
1110  	if (sk1 < sk2) {
1111  		unix_state_lock(sk1);
1112  		unix_state_lock_nested(sk2);
1113  	} else {
1114  		unix_state_lock(sk2);
1115  		unix_state_lock_nested(sk1);
1116  	}
1117  }
1118  
1119  static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1120  {
1121  	if (unlikely(sk1 == sk2) || !sk2) {
1122  		unix_state_unlock(sk1);
1123  		return;
1124  	}
1125  	unix_state_unlock(sk1);
1126  	unix_state_unlock(sk2);
1127  }
1128  
1129  static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1130  			      int alen, int flags)
1131  {
1132  	struct sock *sk = sock->sk;
1133  	struct net *net = sock_net(sk);
1134  	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1135  	struct sock *other;
1136  	unsigned int hash;
1137  	int err;
1138  
1139  	err = -EINVAL;
1140  	if (alen < offsetofend(struct sockaddr, sa_family))
1141  		goto out;
1142  
1143  	if (addr->sa_family != AF_UNSPEC) {
1144  		err = unix_mkname(sunaddr, alen, &hash);
1145  		if (err < 0)
1146  			goto out;
1147  		alen = err;
1148  
1149  		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1150  		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1151  			goto out;
1152  
1153  restart:
1154  		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1155  		if (!other)
1156  			goto out;
1157  
1158  		unix_state_double_lock(sk, other);
1159  
1160  		/* Apparently VFS overslept socket death. Retry. */
1161  		if (sock_flag(other, SOCK_DEAD)) {
1162  			unix_state_double_unlock(sk, other);
1163  			sock_put(other);
1164  			goto restart;
1165  		}
1166  
1167  		err = -EPERM;
1168  		if (!unix_may_send(sk, other))
1169  			goto out_unlock;
1170  
1171  		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1172  		if (err)
1173  			goto out_unlock;
1174  
1175  	} else {
1176  		/*
1177  		 *	1003.1g breaking connected state with AF_UNSPEC
1178  		 */
1179  		other = NULL;
1180  		unix_state_double_lock(sk, other);
1181  	}
1182  
1183  	/*
1184  	 * If it was connected, reconnect.
1185  	 */
1186  	if (unix_peer(sk)) {
1187  		struct sock *old_peer = unix_peer(sk);
1188  		unix_peer(sk) = other;
1189  		unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1190  
1191  		unix_state_double_unlock(sk, other);
1192  
1193  		if (other != old_peer)
1194  			unix_dgram_disconnected(sk, old_peer);
1195  		sock_put(old_peer);
1196  	} else {
1197  		unix_peer(sk) = other;
1198  		unix_state_double_unlock(sk, other);
1199  	}
1200  	return 0;
1201  
1202  out_unlock:
1203  	unix_state_double_unlock(sk, other);
1204  	sock_put(other);
1205  out:
1206  	return err;
1207  }
1208  
1209  static long unix_wait_for_peer(struct sock *other, long timeo)
1210  {
1211  	struct unix_sock *u = unix_sk(other);
1212  	int sched;
1213  	DEFINE_WAIT(wait);
1214  
1215  	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1216  
1217  	sched = !sock_flag(other, SOCK_DEAD) &&
1218  		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1219  		unix_recvq_full(other);
1220  
1221  	unix_state_unlock(other);
1222  
1223  	if (sched)
1224  		timeo = schedule_timeout(timeo);
1225  
1226  	finish_wait(&u->peer_wait, &wait);
1227  	return timeo;
1228  }
1229  
1230  static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1231  			       int addr_len, int flags)
1232  {
1233  	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1234  	struct sock *sk = sock->sk;
1235  	struct net *net = sock_net(sk);
1236  	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1237  	struct sock *newsk = NULL;
1238  	struct sock *other = NULL;
1239  	struct sk_buff *skb = NULL;
1240  	unsigned int hash;
1241  	int st;
1242  	int err;
1243  	long timeo;
1244  
1245  	err = unix_mkname(sunaddr, addr_len, &hash);
1246  	if (err < 0)
1247  		goto out;
1248  	addr_len = err;
1249  
1250  	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1251  	    (err = unix_autobind(sock)) != 0)
1252  		goto out;
1253  
1254  	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1255  
1256  	/* First of all allocate resources.
1257  	   If we will make it after state is locked,
1258  	   we will have to recheck all again in any case.
1259  	 */
1260  
1261  	err = -ENOMEM;
1262  
1263  	/* create new sock for complete connection */
1264  	newsk = unix_create1(sock_net(sk), NULL, 0);
1265  	if (newsk == NULL)
1266  		goto out;
1267  
1268  	/* Allocate skb for sending to listening sock */
1269  	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1270  	if (skb == NULL)
1271  		goto out;
1272  
1273  restart:
1274  	/*  Find listening sock. */
1275  	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1276  	if (!other)
1277  		goto out;
1278  
1279  	/* Latch state of peer */
1280  	unix_state_lock(other);
1281  
1282  	/* Apparently VFS overslept socket death. Retry. */
1283  	if (sock_flag(other, SOCK_DEAD)) {
1284  		unix_state_unlock(other);
1285  		sock_put(other);
1286  		goto restart;
1287  	}
1288  
1289  	err = -ECONNREFUSED;
1290  	if (other->sk_state != TCP_LISTEN)
1291  		goto out_unlock;
1292  	if (other->sk_shutdown & RCV_SHUTDOWN)
1293  		goto out_unlock;
1294  
1295  	if (unix_recvq_full(other)) {
1296  		err = -EAGAIN;
1297  		if (!timeo)
1298  			goto out_unlock;
1299  
1300  		timeo = unix_wait_for_peer(other, timeo);
1301  
1302  		err = sock_intr_errno(timeo);
1303  		if (signal_pending(current))
1304  			goto out;
1305  		sock_put(other);
1306  		goto restart;
1307  	}
1308  
1309  	/* Latch our state.
1310  
1311  	   It is tricky place. We need to grab our state lock and cannot
1312  	   drop lock on peer. It is dangerous because deadlock is
1313  	   possible. Connect to self case and simultaneous
1314  	   attempt to connect are eliminated by checking socket
1315  	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1316  	   check this before attempt to grab lock.
1317  
1318  	   Well, and we have to recheck the state after socket locked.
1319  	 */
1320  	st = sk->sk_state;
1321  
1322  	switch (st) {
1323  	case TCP_CLOSE:
1324  		/* This is ok... continue with connect */
1325  		break;
1326  	case TCP_ESTABLISHED:
1327  		/* Socket is already connected */
1328  		err = -EISCONN;
1329  		goto out_unlock;
1330  	default:
1331  		err = -EINVAL;
1332  		goto out_unlock;
1333  	}
1334  
1335  	unix_state_lock_nested(sk);
1336  
1337  	if (sk->sk_state != st) {
1338  		unix_state_unlock(sk);
1339  		unix_state_unlock(other);
1340  		sock_put(other);
1341  		goto restart;
1342  	}
1343  
1344  	err = security_unix_stream_connect(sk, other, newsk);
1345  	if (err) {
1346  		unix_state_unlock(sk);
1347  		goto out_unlock;
1348  	}
1349  
1350  	/* The way is open! Fastly set all the necessary fields... */
1351  
1352  	sock_hold(sk);
1353  	unix_peer(newsk)	= sk;
1354  	newsk->sk_state		= TCP_ESTABLISHED;
1355  	newsk->sk_type		= sk->sk_type;
1356  	init_peercred(newsk);
1357  	newu = unix_sk(newsk);
1358  	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1359  	otheru = unix_sk(other);
1360  
1361  	/* copy address information from listening to new sock
1362  	 *
1363  	 * The contents of *(otheru->addr) and otheru->path
1364  	 * are seen fully set up here, since we have found
1365  	 * otheru in hash under unix_table_lock.  Insertion
1366  	 * into the hash chain we'd found it in had been done
1367  	 * in an earlier critical area protected by unix_table_lock,
1368  	 * the same one where we'd set *(otheru->addr) contents,
1369  	 * as well as otheru->path and otheru->addr itself.
1370  	 *
1371  	 * Using smp_store_release() here to set newu->addr
1372  	 * is enough to make those stores, as well as stores
1373  	 * to newu->path visible to anyone who gets newu->addr
1374  	 * by smp_load_acquire().  IOW, the same warranties
1375  	 * as for unix_sock instances bound in unix_bind() or
1376  	 * in unix_autobind().
1377  	 */
1378  	if (otheru->path.dentry) {
1379  		path_get(&otheru->path);
1380  		newu->path = otheru->path;
1381  	}
1382  	refcount_inc(&otheru->addr->refcnt);
1383  	smp_store_release(&newu->addr, otheru->addr);
1384  
1385  	/* Set credentials */
1386  	copy_peercred(sk, other);
1387  
1388  	sock->state	= SS_CONNECTED;
1389  	sk->sk_state	= TCP_ESTABLISHED;
1390  	sock_hold(newsk);
1391  
1392  	smp_mb__after_atomic();	/* sock_hold() does an atomic_inc() */
1393  	unix_peer(sk)	= newsk;
1394  
1395  	unix_state_unlock(sk);
1396  
1397  	/* take ten and and send info to listening sock */
1398  	spin_lock(&other->sk_receive_queue.lock);
1399  	__skb_queue_tail(&other->sk_receive_queue, skb);
1400  	spin_unlock(&other->sk_receive_queue.lock);
1401  	unix_state_unlock(other);
1402  	other->sk_data_ready(other);
1403  	sock_put(other);
1404  	return 0;
1405  
1406  out_unlock:
1407  	if (other)
1408  		unix_state_unlock(other);
1409  
1410  out:
1411  	kfree_skb(skb);
1412  	if (newsk)
1413  		unix_release_sock(newsk, 0);
1414  	if (other)
1415  		sock_put(other);
1416  	return err;
1417  }
1418  
1419  static int unix_socketpair(struct socket *socka, struct socket *sockb)
1420  {
1421  	struct sock *ska = socka->sk, *skb = sockb->sk;
1422  
1423  	/* Join our sockets back to back */
1424  	sock_hold(ska);
1425  	sock_hold(skb);
1426  	unix_peer(ska) = skb;
1427  	unix_peer(skb) = ska;
1428  	init_peercred(ska);
1429  	init_peercred(skb);
1430  
1431  	if (ska->sk_type != SOCK_DGRAM) {
1432  		ska->sk_state = TCP_ESTABLISHED;
1433  		skb->sk_state = TCP_ESTABLISHED;
1434  		socka->state  = SS_CONNECTED;
1435  		sockb->state  = SS_CONNECTED;
1436  	}
1437  	return 0;
1438  }
1439  
1440  static void unix_sock_inherit_flags(const struct socket *old,
1441  				    struct socket *new)
1442  {
1443  	if (test_bit(SOCK_PASSCRED, &old->flags))
1444  		set_bit(SOCK_PASSCRED, &new->flags);
1445  	if (test_bit(SOCK_PASSSEC, &old->flags))
1446  		set_bit(SOCK_PASSSEC, &new->flags);
1447  }
1448  
1449  static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1450  		       bool kern)
1451  {
1452  	struct sock *sk = sock->sk;
1453  	struct sock *tsk;
1454  	struct sk_buff *skb;
1455  	int err;
1456  
1457  	err = -EOPNOTSUPP;
1458  	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1459  		goto out;
1460  
1461  	err = -EINVAL;
1462  	if (sk->sk_state != TCP_LISTEN)
1463  		goto out;
1464  
1465  	/* If socket state is TCP_LISTEN it cannot change (for now...),
1466  	 * so that no locks are necessary.
1467  	 */
1468  
1469  	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1470  	if (!skb) {
1471  		/* This means receive shutdown. */
1472  		if (err == 0)
1473  			err = -EINVAL;
1474  		goto out;
1475  	}
1476  
1477  	tsk = skb->sk;
1478  	skb_free_datagram(sk, skb);
1479  	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1480  
1481  	/* attach accepted sock to socket */
1482  	unix_state_lock(tsk);
1483  	newsock->state = SS_CONNECTED;
1484  	unix_sock_inherit_flags(sock, newsock);
1485  	sock_graft(tsk, newsock);
1486  	unix_state_unlock(tsk);
1487  	return 0;
1488  
1489  out:
1490  	return err;
1491  }
1492  
1493  
1494  static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1495  {
1496  	struct sock *sk = sock->sk;
1497  	struct unix_address *addr;
1498  	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1499  	int err = 0;
1500  
1501  	if (peer) {
1502  		sk = unix_peer_get(sk);
1503  
1504  		err = -ENOTCONN;
1505  		if (!sk)
1506  			goto out;
1507  		err = 0;
1508  	} else {
1509  		sock_hold(sk);
1510  	}
1511  
1512  	addr = smp_load_acquire(&unix_sk(sk)->addr);
1513  	if (!addr) {
1514  		sunaddr->sun_family = AF_UNIX;
1515  		sunaddr->sun_path[0] = 0;
1516  		err = sizeof(short);
1517  	} else {
1518  		err = addr->len;
1519  		memcpy(sunaddr, addr->name, addr->len);
1520  	}
1521  	sock_put(sk);
1522  out:
1523  	return err;
1524  }
1525  
1526  static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1527  {
1528  	int err = 0;
1529  
1530  	UNIXCB(skb).pid  = get_pid(scm->pid);
1531  	UNIXCB(skb).uid = scm->creds.uid;
1532  	UNIXCB(skb).gid = scm->creds.gid;
1533  	UNIXCB(skb).fp = NULL;
1534  	unix_get_secdata(scm, skb);
1535  	if (scm->fp && send_fds)
1536  		err = unix_attach_fds(scm, skb);
1537  
1538  	skb->destructor = unix_destruct_scm;
1539  	return err;
1540  }
1541  
1542  static bool unix_passcred_enabled(const struct socket *sock,
1543  				  const struct sock *other)
1544  {
1545  	return test_bit(SOCK_PASSCRED, &sock->flags) ||
1546  	       !other->sk_socket ||
1547  	       test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1548  }
1549  
1550  /*
1551   * Some apps rely on write() giving SCM_CREDENTIALS
1552   * We include credentials if source or destination socket
1553   * asserted SOCK_PASSCRED.
1554   */
1555  static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1556  			    const struct sock *other)
1557  {
1558  	if (UNIXCB(skb).pid)
1559  		return;
1560  	if (unix_passcred_enabled(sock, other)) {
1561  		UNIXCB(skb).pid  = get_pid(task_tgid(current));
1562  		current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1563  	}
1564  }
1565  
1566  static int maybe_init_creds(struct scm_cookie *scm,
1567  			    struct socket *socket,
1568  			    const struct sock *other)
1569  {
1570  	int err;
1571  	struct msghdr msg = { .msg_controllen = 0 };
1572  
1573  	err = scm_send(socket, &msg, scm, false);
1574  	if (err)
1575  		return err;
1576  
1577  	if (unix_passcred_enabled(socket, other)) {
1578  		scm->pid = get_pid(task_tgid(current));
1579  		current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1580  	}
1581  	return err;
1582  }
1583  
1584  static bool unix_skb_scm_eq(struct sk_buff *skb,
1585  			    struct scm_cookie *scm)
1586  {
1587  	const struct unix_skb_parms *u = &UNIXCB(skb);
1588  
1589  	return u->pid == scm->pid &&
1590  	       uid_eq(u->uid, scm->creds.uid) &&
1591  	       gid_eq(u->gid, scm->creds.gid) &&
1592  	       unix_secdata_eq(scm, skb);
1593  }
1594  
1595  static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
1596  {
1597  	struct scm_fp_list *fp = UNIXCB(skb).fp;
1598  	struct unix_sock *u = unix_sk(sk);
1599  
1600  	lockdep_assert_held(&sk->sk_receive_queue.lock);
1601  
1602  	if (unlikely(fp && fp->count))
1603  		u->scm_stat.nr_fds += fp->count;
1604  }
1605  
1606  static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
1607  {
1608  	struct scm_fp_list *fp = UNIXCB(skb).fp;
1609  	struct unix_sock *u = unix_sk(sk);
1610  
1611  	lockdep_assert_held(&sk->sk_receive_queue.lock);
1612  
1613  	if (unlikely(fp && fp->count))
1614  		u->scm_stat.nr_fds -= fp->count;
1615  }
1616  
1617  /*
1618   *	Send AF_UNIX data.
1619   */
1620  
1621  static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1622  			      size_t len)
1623  {
1624  	struct sock *sk = sock->sk;
1625  	struct net *net = sock_net(sk);
1626  	struct unix_sock *u = unix_sk(sk);
1627  	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1628  	struct sock *other = NULL;
1629  	int namelen = 0; /* fake GCC */
1630  	int err;
1631  	unsigned int hash;
1632  	struct sk_buff *skb;
1633  	long timeo;
1634  	struct scm_cookie scm;
1635  	int data_len = 0;
1636  	int sk_locked;
1637  
1638  	wait_for_unix_gc();
1639  	err = scm_send(sock, msg, &scm, false);
1640  	if (err < 0)
1641  		return err;
1642  
1643  	err = -EOPNOTSUPP;
1644  	if (msg->msg_flags&MSG_OOB)
1645  		goto out;
1646  
1647  	if (msg->msg_namelen) {
1648  		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1649  		if (err < 0)
1650  			goto out;
1651  		namelen = err;
1652  	} else {
1653  		sunaddr = NULL;
1654  		err = -ENOTCONN;
1655  		other = unix_peer_get(sk);
1656  		if (!other)
1657  			goto out;
1658  	}
1659  
1660  	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1661  	    && (err = unix_autobind(sock)) != 0)
1662  		goto out;
1663  
1664  	err = -EMSGSIZE;
1665  	if (len > sk->sk_sndbuf - 32)
1666  		goto out;
1667  
1668  	if (len > SKB_MAX_ALLOC) {
1669  		data_len = min_t(size_t,
1670  				 len - SKB_MAX_ALLOC,
1671  				 MAX_SKB_FRAGS * PAGE_SIZE);
1672  		data_len = PAGE_ALIGN(data_len);
1673  
1674  		BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1675  	}
1676  
1677  	skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1678  				   msg->msg_flags & MSG_DONTWAIT, &err,
1679  				   PAGE_ALLOC_COSTLY_ORDER);
1680  	if (skb == NULL)
1681  		goto out;
1682  
1683  	err = unix_scm_to_skb(&scm, skb, true);
1684  	if (err < 0)
1685  		goto out_free;
1686  
1687  	skb_put(skb, len - data_len);
1688  	skb->data_len = data_len;
1689  	skb->len = len;
1690  	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1691  	if (err)
1692  		goto out_free;
1693  
1694  	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1695  
1696  restart:
1697  	if (!other) {
1698  		err = -ECONNRESET;
1699  		if (sunaddr == NULL)
1700  			goto out_free;
1701  
1702  		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1703  					hash, &err);
1704  		if (other == NULL)
1705  			goto out_free;
1706  	}
1707  
1708  	if (sk_filter(other, skb) < 0) {
1709  		/* Toss the packet but do not return any error to the sender */
1710  		err = len;
1711  		goto out_free;
1712  	}
1713  
1714  	sk_locked = 0;
1715  	unix_state_lock(other);
1716  restart_locked:
1717  	err = -EPERM;
1718  	if (!unix_may_send(sk, other))
1719  		goto out_unlock;
1720  
1721  	if (unlikely(sock_flag(other, SOCK_DEAD))) {
1722  		/*
1723  		 *	Check with 1003.1g - what should
1724  		 *	datagram error
1725  		 */
1726  		unix_state_unlock(other);
1727  		sock_put(other);
1728  
1729  		if (!sk_locked)
1730  			unix_state_lock(sk);
1731  
1732  		err = 0;
1733  		if (unix_peer(sk) == other) {
1734  			unix_peer(sk) = NULL;
1735  			unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1736  
1737  			unix_state_unlock(sk);
1738  
1739  			unix_dgram_disconnected(sk, other);
1740  			sock_put(other);
1741  			err = -ECONNREFUSED;
1742  		} else {
1743  			unix_state_unlock(sk);
1744  		}
1745  
1746  		other = NULL;
1747  		if (err)
1748  			goto out_free;
1749  		goto restart;
1750  	}
1751  
1752  	err = -EPIPE;
1753  	if (other->sk_shutdown & RCV_SHUTDOWN)
1754  		goto out_unlock;
1755  
1756  	if (sk->sk_type != SOCK_SEQPACKET) {
1757  		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1758  		if (err)
1759  			goto out_unlock;
1760  	}
1761  
1762  	/* other == sk && unix_peer(other) != sk if
1763  	 * - unix_peer(sk) == NULL, destination address bound to sk
1764  	 * - unix_peer(sk) == sk by time of get but disconnected before lock
1765  	 */
1766  	if (other != sk &&
1767  	    unlikely(unix_peer(other) != sk &&
1768  	    unix_recvq_full_lockless(other))) {
1769  		if (timeo) {
1770  			timeo = unix_wait_for_peer(other, timeo);
1771  
1772  			err = sock_intr_errno(timeo);
1773  			if (signal_pending(current))
1774  				goto out_free;
1775  
1776  			goto restart;
1777  		}
1778  
1779  		if (!sk_locked) {
1780  			unix_state_unlock(other);
1781  			unix_state_double_lock(sk, other);
1782  		}
1783  
1784  		if (unix_peer(sk) != other ||
1785  		    unix_dgram_peer_wake_me(sk, other)) {
1786  			err = -EAGAIN;
1787  			sk_locked = 1;
1788  			goto out_unlock;
1789  		}
1790  
1791  		if (!sk_locked) {
1792  			sk_locked = 1;
1793  			goto restart_locked;
1794  		}
1795  	}
1796  
1797  	if (unlikely(sk_locked))
1798  		unix_state_unlock(sk);
1799  
1800  	if (sock_flag(other, SOCK_RCVTSTAMP))
1801  		__net_timestamp(skb);
1802  	maybe_add_creds(skb, sock, other);
1803  	spin_lock(&other->sk_receive_queue.lock);
1804  	scm_stat_add(other, skb);
1805  	__skb_queue_tail(&other->sk_receive_queue, skb);
1806  	spin_unlock(&other->sk_receive_queue.lock);
1807  	unix_state_unlock(other);
1808  	other->sk_data_ready(other);
1809  	sock_put(other);
1810  	scm_destroy(&scm);
1811  	return len;
1812  
1813  out_unlock:
1814  	if (sk_locked)
1815  		unix_state_unlock(sk);
1816  	unix_state_unlock(other);
1817  out_free:
1818  	kfree_skb(skb);
1819  out:
1820  	if (other)
1821  		sock_put(other);
1822  	scm_destroy(&scm);
1823  	return err;
1824  }
1825  
1826  /* We use paged skbs for stream sockets, and limit occupancy to 32768
1827   * bytes, and a minimum of a full page.
1828   */
1829  #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1830  
1831  static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1832  			       size_t len)
1833  {
1834  	struct sock *sk = sock->sk;
1835  	struct sock *other = NULL;
1836  	int err, size;
1837  	struct sk_buff *skb;
1838  	int sent = 0;
1839  	struct scm_cookie scm;
1840  	bool fds_sent = false;
1841  	int data_len;
1842  
1843  	wait_for_unix_gc();
1844  	err = scm_send(sock, msg, &scm, false);
1845  	if (err < 0)
1846  		return err;
1847  
1848  	err = -EOPNOTSUPP;
1849  	if (msg->msg_flags&MSG_OOB)
1850  		goto out_err;
1851  
1852  	if (msg->msg_namelen) {
1853  		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1854  		goto out_err;
1855  	} else {
1856  		err = -ENOTCONN;
1857  		other = unix_peer(sk);
1858  		if (!other)
1859  			goto out_err;
1860  	}
1861  
1862  	if (sk->sk_shutdown & SEND_SHUTDOWN)
1863  		goto pipe_err;
1864  
1865  	while (sent < len) {
1866  		size = len - sent;
1867  
1868  		/* Keep two messages in the pipe so it schedules better */
1869  		size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1870  
1871  		/* allow fallback to order-0 allocations */
1872  		size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1873  
1874  		data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1875  
1876  		data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1877  
1878  		skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1879  					   msg->msg_flags & MSG_DONTWAIT, &err,
1880  					   get_order(UNIX_SKB_FRAGS_SZ));
1881  		if (!skb)
1882  			goto out_err;
1883  
1884  		/* Only send the fds in the first buffer */
1885  		err = unix_scm_to_skb(&scm, skb, !fds_sent);
1886  		if (err < 0) {
1887  			kfree_skb(skb);
1888  			goto out_err;
1889  		}
1890  		fds_sent = true;
1891  
1892  		skb_put(skb, size - data_len);
1893  		skb->data_len = data_len;
1894  		skb->len = size;
1895  		err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1896  		if (err) {
1897  			kfree_skb(skb);
1898  			goto out_err;
1899  		}
1900  
1901  		unix_state_lock(other);
1902  
1903  		if (sock_flag(other, SOCK_DEAD) ||
1904  		    (other->sk_shutdown & RCV_SHUTDOWN))
1905  			goto pipe_err_free;
1906  
1907  		maybe_add_creds(skb, sock, other);
1908  		spin_lock(&other->sk_receive_queue.lock);
1909  		scm_stat_add(other, skb);
1910  		__skb_queue_tail(&other->sk_receive_queue, skb);
1911  		spin_unlock(&other->sk_receive_queue.lock);
1912  		unix_state_unlock(other);
1913  		other->sk_data_ready(other);
1914  		sent += size;
1915  	}
1916  
1917  	scm_destroy(&scm);
1918  
1919  	return sent;
1920  
1921  pipe_err_free:
1922  	unix_state_unlock(other);
1923  	kfree_skb(skb);
1924  pipe_err:
1925  	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1926  		send_sig(SIGPIPE, current, 0);
1927  	err = -EPIPE;
1928  out_err:
1929  	scm_destroy(&scm);
1930  	return sent ? : err;
1931  }
1932  
1933  static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1934  				    int offset, size_t size, int flags)
1935  {
1936  	int err;
1937  	bool send_sigpipe = false;
1938  	bool init_scm = true;
1939  	struct scm_cookie scm;
1940  	struct sock *other, *sk = socket->sk;
1941  	struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1942  
1943  	if (flags & MSG_OOB)
1944  		return -EOPNOTSUPP;
1945  
1946  	other = unix_peer(sk);
1947  	if (!other || sk->sk_state != TCP_ESTABLISHED)
1948  		return -ENOTCONN;
1949  
1950  	if (false) {
1951  alloc_skb:
1952  		unix_state_unlock(other);
1953  		mutex_unlock(&unix_sk(other)->iolock);
1954  		newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1955  					      &err, 0);
1956  		if (!newskb)
1957  			goto err;
1958  	}
1959  
1960  	/* we must acquire iolock as we modify already present
1961  	 * skbs in the sk_receive_queue and mess with skb->len
1962  	 */
1963  	err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1964  	if (err) {
1965  		err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1966  		goto err;
1967  	}
1968  
1969  	if (sk->sk_shutdown & SEND_SHUTDOWN) {
1970  		err = -EPIPE;
1971  		send_sigpipe = true;
1972  		goto err_unlock;
1973  	}
1974  
1975  	unix_state_lock(other);
1976  
1977  	if (sock_flag(other, SOCK_DEAD) ||
1978  	    other->sk_shutdown & RCV_SHUTDOWN) {
1979  		err = -EPIPE;
1980  		send_sigpipe = true;
1981  		goto err_state_unlock;
1982  	}
1983  
1984  	if (init_scm) {
1985  		err = maybe_init_creds(&scm, socket, other);
1986  		if (err)
1987  			goto err_state_unlock;
1988  		init_scm = false;
1989  	}
1990  
1991  	skb = skb_peek_tail(&other->sk_receive_queue);
1992  	if (tail && tail == skb) {
1993  		skb = newskb;
1994  	} else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
1995  		if (newskb) {
1996  			skb = newskb;
1997  		} else {
1998  			tail = skb;
1999  			goto alloc_skb;
2000  		}
2001  	} else if (newskb) {
2002  		/* this is fast path, we don't necessarily need to
2003  		 * call to kfree_skb even though with newskb == NULL
2004  		 * this - does no harm
2005  		 */
2006  		consume_skb(newskb);
2007  		newskb = NULL;
2008  	}
2009  
2010  	if (skb_append_pagefrags(skb, page, offset, size)) {
2011  		tail = skb;
2012  		goto alloc_skb;
2013  	}
2014  
2015  	skb->len += size;
2016  	skb->data_len += size;
2017  	skb->truesize += size;
2018  	refcount_add(size, &sk->sk_wmem_alloc);
2019  
2020  	if (newskb) {
2021  		err = unix_scm_to_skb(&scm, skb, false);
2022  		if (err)
2023  			goto err_state_unlock;
2024  		spin_lock(&other->sk_receive_queue.lock);
2025  		__skb_queue_tail(&other->sk_receive_queue, newskb);
2026  		spin_unlock(&other->sk_receive_queue.lock);
2027  	}
2028  
2029  	unix_state_unlock(other);
2030  	mutex_unlock(&unix_sk(other)->iolock);
2031  
2032  	other->sk_data_ready(other);
2033  	scm_destroy(&scm);
2034  	return size;
2035  
2036  err_state_unlock:
2037  	unix_state_unlock(other);
2038  err_unlock:
2039  	mutex_unlock(&unix_sk(other)->iolock);
2040  err:
2041  	kfree_skb(newskb);
2042  	if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2043  		send_sig(SIGPIPE, current, 0);
2044  	if (!init_scm)
2045  		scm_destroy(&scm);
2046  	return err;
2047  }
2048  
2049  static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2050  				  size_t len)
2051  {
2052  	int err;
2053  	struct sock *sk = sock->sk;
2054  
2055  	err = sock_error(sk);
2056  	if (err)
2057  		return err;
2058  
2059  	if (sk->sk_state != TCP_ESTABLISHED)
2060  		return -ENOTCONN;
2061  
2062  	if (msg->msg_namelen)
2063  		msg->msg_namelen = 0;
2064  
2065  	return unix_dgram_sendmsg(sock, msg, len);
2066  }
2067  
2068  static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2069  				  size_t size, int flags)
2070  {
2071  	struct sock *sk = sock->sk;
2072  
2073  	if (sk->sk_state != TCP_ESTABLISHED)
2074  		return -ENOTCONN;
2075  
2076  	return unix_dgram_recvmsg(sock, msg, size, flags);
2077  }
2078  
2079  static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2080  {
2081  	struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2082  
2083  	if (addr) {
2084  		msg->msg_namelen = addr->len;
2085  		memcpy(msg->msg_name, addr->name, addr->len);
2086  	}
2087  }
2088  
2089  static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2090  			      size_t size, int flags)
2091  {
2092  	struct scm_cookie scm;
2093  	struct sock *sk = sock->sk;
2094  	struct unix_sock *u = unix_sk(sk);
2095  	struct sk_buff *skb, *last;
2096  	long timeo;
2097  	int skip;
2098  	int err;
2099  
2100  	err = -EOPNOTSUPP;
2101  	if (flags&MSG_OOB)
2102  		goto out;
2103  
2104  	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2105  
2106  	do {
2107  		mutex_lock(&u->iolock);
2108  
2109  		skip = sk_peek_offset(sk, flags);
2110  		skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2111  					      scm_stat_del, &skip, &err, &last);
2112  		if (skb)
2113  			break;
2114  
2115  		mutex_unlock(&u->iolock);
2116  
2117  		if (err != -EAGAIN)
2118  			break;
2119  	} while (timeo &&
2120  		 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2121  					      &err, &timeo, last));
2122  
2123  	if (!skb) { /* implies iolock unlocked */
2124  		unix_state_lock(sk);
2125  		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2126  		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2127  		    (sk->sk_shutdown & RCV_SHUTDOWN))
2128  			err = 0;
2129  		unix_state_unlock(sk);
2130  		goto out;
2131  	}
2132  
2133  	if (wq_has_sleeper(&u->peer_wait))
2134  		wake_up_interruptible_sync_poll(&u->peer_wait,
2135  						EPOLLOUT | EPOLLWRNORM |
2136  						EPOLLWRBAND);
2137  
2138  	if (msg->msg_name)
2139  		unix_copy_addr(msg, skb->sk);
2140  
2141  	if (size > skb->len - skip)
2142  		size = skb->len - skip;
2143  	else if (size < skb->len - skip)
2144  		msg->msg_flags |= MSG_TRUNC;
2145  
2146  	err = skb_copy_datagram_msg(skb, skip, msg, size);
2147  	if (err)
2148  		goto out_free;
2149  
2150  	if (sock_flag(sk, SOCK_RCVTSTAMP))
2151  		__sock_recv_timestamp(msg, sk, skb);
2152  
2153  	memset(&scm, 0, sizeof(scm));
2154  
2155  	scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2156  	unix_set_secdata(&scm, skb);
2157  
2158  	if (!(flags & MSG_PEEK)) {
2159  		if (UNIXCB(skb).fp)
2160  			unix_detach_fds(&scm, skb);
2161  
2162  		sk_peek_offset_bwd(sk, skb->len);
2163  	} else {
2164  		/* It is questionable: on PEEK we could:
2165  		   - do not return fds - good, but too simple 8)
2166  		   - return fds, and do not return them on read (old strategy,
2167  		     apparently wrong)
2168  		   - clone fds (I chose it for now, it is the most universal
2169  		     solution)
2170  
2171  		   POSIX 1003.1g does not actually define this clearly
2172  		   at all. POSIX 1003.1g doesn't define a lot of things
2173  		   clearly however!
2174  
2175  		*/
2176  
2177  		sk_peek_offset_fwd(sk, size);
2178  
2179  		if (UNIXCB(skb).fp)
2180  			scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2181  	}
2182  	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2183  
2184  	scm_recv(sock, msg, &scm, flags);
2185  
2186  out_free:
2187  	skb_free_datagram(sk, skb);
2188  	mutex_unlock(&u->iolock);
2189  out:
2190  	return err;
2191  }
2192  
2193  /*
2194   *	Sleep until more data has arrived. But check for races..
2195   */
2196  static long unix_stream_data_wait(struct sock *sk, long timeo,
2197  				  struct sk_buff *last, unsigned int last_len,
2198  				  bool freezable)
2199  {
2200  	struct sk_buff *tail;
2201  	DEFINE_WAIT(wait);
2202  
2203  	unix_state_lock(sk);
2204  
2205  	for (;;) {
2206  		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2207  
2208  		tail = skb_peek_tail(&sk->sk_receive_queue);
2209  		if (tail != last ||
2210  		    (tail && tail->len != last_len) ||
2211  		    sk->sk_err ||
2212  		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
2213  		    signal_pending(current) ||
2214  		    !timeo)
2215  			break;
2216  
2217  		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2218  		unix_state_unlock(sk);
2219  		if (freezable)
2220  			timeo = freezable_schedule_timeout(timeo);
2221  		else
2222  			timeo = schedule_timeout(timeo);
2223  		unix_state_lock(sk);
2224  
2225  		if (sock_flag(sk, SOCK_DEAD))
2226  			break;
2227  
2228  		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2229  	}
2230  
2231  	finish_wait(sk_sleep(sk), &wait);
2232  	unix_state_unlock(sk);
2233  	return timeo;
2234  }
2235  
2236  static unsigned int unix_skb_len(const struct sk_buff *skb)
2237  {
2238  	return skb->len - UNIXCB(skb).consumed;
2239  }
2240  
2241  struct unix_stream_read_state {
2242  	int (*recv_actor)(struct sk_buff *, int, int,
2243  			  struct unix_stream_read_state *);
2244  	struct socket *socket;
2245  	struct msghdr *msg;
2246  	struct pipe_inode_info *pipe;
2247  	size_t size;
2248  	int flags;
2249  	unsigned int splice_flags;
2250  };
2251  
2252  static int unix_stream_read_generic(struct unix_stream_read_state *state,
2253  				    bool freezable)
2254  {
2255  	struct scm_cookie scm;
2256  	struct socket *sock = state->socket;
2257  	struct sock *sk = sock->sk;
2258  	struct unix_sock *u = unix_sk(sk);
2259  	int copied = 0;
2260  	int flags = state->flags;
2261  	int noblock = flags & MSG_DONTWAIT;
2262  	bool check_creds = false;
2263  	int target;
2264  	int err = 0;
2265  	long timeo;
2266  	int skip;
2267  	size_t size = state->size;
2268  	unsigned int last_len;
2269  
2270  	if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2271  		err = -EINVAL;
2272  		goto out;
2273  	}
2274  
2275  	if (unlikely(flags & MSG_OOB)) {
2276  		err = -EOPNOTSUPP;
2277  		goto out;
2278  	}
2279  
2280  	target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2281  	timeo = sock_rcvtimeo(sk, noblock);
2282  
2283  	memset(&scm, 0, sizeof(scm));
2284  
2285  	/* Lock the socket to prevent queue disordering
2286  	 * while sleeps in memcpy_tomsg
2287  	 */
2288  	mutex_lock(&u->iolock);
2289  
2290  	skip = max(sk_peek_offset(sk, flags), 0);
2291  
2292  	do {
2293  		int chunk;
2294  		bool drop_skb;
2295  		struct sk_buff *skb, *last;
2296  
2297  redo:
2298  		unix_state_lock(sk);
2299  		if (sock_flag(sk, SOCK_DEAD)) {
2300  			err = -ECONNRESET;
2301  			goto unlock;
2302  		}
2303  		last = skb = skb_peek(&sk->sk_receive_queue);
2304  		last_len = last ? last->len : 0;
2305  again:
2306  		if (skb == NULL) {
2307  			if (copied >= target)
2308  				goto unlock;
2309  
2310  			/*
2311  			 *	POSIX 1003.1g mandates this order.
2312  			 */
2313  
2314  			err = sock_error(sk);
2315  			if (err)
2316  				goto unlock;
2317  			if (sk->sk_shutdown & RCV_SHUTDOWN)
2318  				goto unlock;
2319  
2320  			unix_state_unlock(sk);
2321  			if (!timeo) {
2322  				err = -EAGAIN;
2323  				break;
2324  			}
2325  
2326  			mutex_unlock(&u->iolock);
2327  
2328  			timeo = unix_stream_data_wait(sk, timeo, last,
2329  						      last_len, freezable);
2330  
2331  			if (signal_pending(current)) {
2332  				err = sock_intr_errno(timeo);
2333  				scm_destroy(&scm);
2334  				goto out;
2335  			}
2336  
2337  			mutex_lock(&u->iolock);
2338  			goto redo;
2339  unlock:
2340  			unix_state_unlock(sk);
2341  			break;
2342  		}
2343  
2344  		while (skip >= unix_skb_len(skb)) {
2345  			skip -= unix_skb_len(skb);
2346  			last = skb;
2347  			last_len = skb->len;
2348  			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2349  			if (!skb)
2350  				goto again;
2351  		}
2352  
2353  		unix_state_unlock(sk);
2354  
2355  		if (check_creds) {
2356  			/* Never glue messages from different writers */
2357  			if (!unix_skb_scm_eq(skb, &scm))
2358  				break;
2359  		} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2360  			/* Copy credentials */
2361  			scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2362  			unix_set_secdata(&scm, skb);
2363  			check_creds = true;
2364  		}
2365  
2366  		/* Copy address just once */
2367  		if (state->msg && state->msg->msg_name) {
2368  			DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2369  					 state->msg->msg_name);
2370  			unix_copy_addr(state->msg, skb->sk);
2371  			sunaddr = NULL;
2372  		}
2373  
2374  		chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2375  		skb_get(skb);
2376  		chunk = state->recv_actor(skb, skip, chunk, state);
2377  		drop_skb = !unix_skb_len(skb);
2378  		/* skb is only safe to use if !drop_skb */
2379  		consume_skb(skb);
2380  		if (chunk < 0) {
2381  			if (copied == 0)
2382  				copied = -EFAULT;
2383  			break;
2384  		}
2385  		copied += chunk;
2386  		size -= chunk;
2387  
2388  		if (drop_skb) {
2389  			/* the skb was touched by a concurrent reader;
2390  			 * we should not expect anything from this skb
2391  			 * anymore and assume it invalid - we can be
2392  			 * sure it was dropped from the socket queue
2393  			 *
2394  			 * let's report a short read
2395  			 */
2396  			err = 0;
2397  			break;
2398  		}
2399  
2400  		/* Mark read part of skb as used */
2401  		if (!(flags & MSG_PEEK)) {
2402  			UNIXCB(skb).consumed += chunk;
2403  
2404  			sk_peek_offset_bwd(sk, chunk);
2405  
2406  			if (UNIXCB(skb).fp) {
2407  				spin_lock(&sk->sk_receive_queue.lock);
2408  				scm_stat_del(sk, skb);
2409  				spin_unlock(&sk->sk_receive_queue.lock);
2410  				unix_detach_fds(&scm, skb);
2411  			}
2412  
2413  			if (unix_skb_len(skb))
2414  				break;
2415  
2416  			skb_unlink(skb, &sk->sk_receive_queue);
2417  			consume_skb(skb);
2418  
2419  			if (scm.fp)
2420  				break;
2421  		} else {
2422  			/* It is questionable, see note in unix_dgram_recvmsg.
2423  			 */
2424  			if (UNIXCB(skb).fp)
2425  				scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2426  
2427  			sk_peek_offset_fwd(sk, chunk);
2428  
2429  			if (UNIXCB(skb).fp)
2430  				break;
2431  
2432  			skip = 0;
2433  			last = skb;
2434  			last_len = skb->len;
2435  			unix_state_lock(sk);
2436  			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2437  			if (skb)
2438  				goto again;
2439  			unix_state_unlock(sk);
2440  			break;
2441  		}
2442  	} while (size);
2443  
2444  	mutex_unlock(&u->iolock);
2445  	if (state->msg)
2446  		scm_recv(sock, state->msg, &scm, flags);
2447  	else
2448  		scm_destroy(&scm);
2449  out:
2450  	return copied ? : err;
2451  }
2452  
2453  static int unix_stream_read_actor(struct sk_buff *skb,
2454  				  int skip, int chunk,
2455  				  struct unix_stream_read_state *state)
2456  {
2457  	int ret;
2458  
2459  	ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2460  				    state->msg, chunk);
2461  	return ret ?: chunk;
2462  }
2463  
2464  static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2465  			       size_t size, int flags)
2466  {
2467  	struct unix_stream_read_state state = {
2468  		.recv_actor = unix_stream_read_actor,
2469  		.socket = sock,
2470  		.msg = msg,
2471  		.size = size,
2472  		.flags = flags
2473  	};
2474  
2475  	return unix_stream_read_generic(&state, true);
2476  }
2477  
2478  static int unix_stream_splice_actor(struct sk_buff *skb,
2479  				    int skip, int chunk,
2480  				    struct unix_stream_read_state *state)
2481  {
2482  	return skb_splice_bits(skb, state->socket->sk,
2483  			       UNIXCB(skb).consumed + skip,
2484  			       state->pipe, chunk, state->splice_flags);
2485  }
2486  
2487  static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2488  				       struct pipe_inode_info *pipe,
2489  				       size_t size, unsigned int flags)
2490  {
2491  	struct unix_stream_read_state state = {
2492  		.recv_actor = unix_stream_splice_actor,
2493  		.socket = sock,
2494  		.pipe = pipe,
2495  		.size = size,
2496  		.splice_flags = flags,
2497  	};
2498  
2499  	if (unlikely(*ppos))
2500  		return -ESPIPE;
2501  
2502  	if (sock->file->f_flags & O_NONBLOCK ||
2503  	    flags & SPLICE_F_NONBLOCK)
2504  		state.flags = MSG_DONTWAIT;
2505  
2506  	return unix_stream_read_generic(&state, false);
2507  }
2508  
2509  static int unix_shutdown(struct socket *sock, int mode)
2510  {
2511  	struct sock *sk = sock->sk;
2512  	struct sock *other;
2513  
2514  	if (mode < SHUT_RD || mode > SHUT_RDWR)
2515  		return -EINVAL;
2516  	/* This maps:
2517  	 * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2518  	 * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2519  	 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2520  	 */
2521  	++mode;
2522  
2523  	unix_state_lock(sk);
2524  	sk->sk_shutdown |= mode;
2525  	other = unix_peer(sk);
2526  	if (other)
2527  		sock_hold(other);
2528  	unix_state_unlock(sk);
2529  	sk->sk_state_change(sk);
2530  
2531  	if (other &&
2532  		(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2533  
2534  		int peer_mode = 0;
2535  
2536  		if (mode&RCV_SHUTDOWN)
2537  			peer_mode |= SEND_SHUTDOWN;
2538  		if (mode&SEND_SHUTDOWN)
2539  			peer_mode |= RCV_SHUTDOWN;
2540  		unix_state_lock(other);
2541  		other->sk_shutdown |= peer_mode;
2542  		unix_state_unlock(other);
2543  		other->sk_state_change(other);
2544  		if (peer_mode == SHUTDOWN_MASK)
2545  			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2546  		else if (peer_mode & RCV_SHUTDOWN)
2547  			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2548  	}
2549  	if (other)
2550  		sock_put(other);
2551  
2552  	return 0;
2553  }
2554  
2555  long unix_inq_len(struct sock *sk)
2556  {
2557  	struct sk_buff *skb;
2558  	long amount = 0;
2559  
2560  	if (sk->sk_state == TCP_LISTEN)
2561  		return -EINVAL;
2562  
2563  	spin_lock(&sk->sk_receive_queue.lock);
2564  	if (sk->sk_type == SOCK_STREAM ||
2565  	    sk->sk_type == SOCK_SEQPACKET) {
2566  		skb_queue_walk(&sk->sk_receive_queue, skb)
2567  			amount += unix_skb_len(skb);
2568  	} else {
2569  		skb = skb_peek(&sk->sk_receive_queue);
2570  		if (skb)
2571  			amount = skb->len;
2572  	}
2573  	spin_unlock(&sk->sk_receive_queue.lock);
2574  
2575  	return amount;
2576  }
2577  EXPORT_SYMBOL_GPL(unix_inq_len);
2578  
2579  long unix_outq_len(struct sock *sk)
2580  {
2581  	return sk_wmem_alloc_get(sk);
2582  }
2583  EXPORT_SYMBOL_GPL(unix_outq_len);
2584  
2585  static int unix_open_file(struct sock *sk)
2586  {
2587  	struct path path;
2588  	struct file *f;
2589  	int fd;
2590  
2591  	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2592  		return -EPERM;
2593  
2594  	if (!smp_load_acquire(&unix_sk(sk)->addr))
2595  		return -ENOENT;
2596  
2597  	path = unix_sk(sk)->path;
2598  	if (!path.dentry)
2599  		return -ENOENT;
2600  
2601  	path_get(&path);
2602  
2603  	fd = get_unused_fd_flags(O_CLOEXEC);
2604  	if (fd < 0)
2605  		goto out;
2606  
2607  	f = dentry_open(&path, O_PATH, current_cred());
2608  	if (IS_ERR(f)) {
2609  		put_unused_fd(fd);
2610  		fd = PTR_ERR(f);
2611  		goto out;
2612  	}
2613  
2614  	fd_install(fd, f);
2615  out:
2616  	path_put(&path);
2617  
2618  	return fd;
2619  }
2620  
2621  static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2622  {
2623  	struct sock *sk = sock->sk;
2624  	long amount = 0;
2625  	int err;
2626  
2627  	switch (cmd) {
2628  	case SIOCOUTQ:
2629  		amount = unix_outq_len(sk);
2630  		err = put_user(amount, (int __user *)arg);
2631  		break;
2632  	case SIOCINQ:
2633  		amount = unix_inq_len(sk);
2634  		if (amount < 0)
2635  			err = amount;
2636  		else
2637  			err = put_user(amount, (int __user *)arg);
2638  		break;
2639  	case SIOCUNIXFILE:
2640  		err = unix_open_file(sk);
2641  		break;
2642  	default:
2643  		err = -ENOIOCTLCMD;
2644  		break;
2645  	}
2646  	return err;
2647  }
2648  
2649  #ifdef CONFIG_COMPAT
2650  static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2651  {
2652  	return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
2653  }
2654  #endif
2655  
2656  static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2657  {
2658  	struct sock *sk = sock->sk;
2659  	__poll_t mask;
2660  
2661  	sock_poll_wait(file, sock, wait);
2662  	mask = 0;
2663  
2664  	/* exceptional events? */
2665  	if (sk->sk_err)
2666  		mask |= EPOLLERR;
2667  	if (sk->sk_shutdown == SHUTDOWN_MASK)
2668  		mask |= EPOLLHUP;
2669  	if (sk->sk_shutdown & RCV_SHUTDOWN)
2670  		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2671  
2672  	/* readable? */
2673  	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2674  		mask |= EPOLLIN | EPOLLRDNORM;
2675  
2676  	/* Connection-based need to check for termination and startup */
2677  	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2678  	    sk->sk_state == TCP_CLOSE)
2679  		mask |= EPOLLHUP;
2680  
2681  	/*
2682  	 * we set writable also when the other side has shut down the
2683  	 * connection. This prevents stuck sockets.
2684  	 */
2685  	if (unix_writable(sk))
2686  		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2687  
2688  	return mask;
2689  }
2690  
2691  static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2692  				    poll_table *wait)
2693  {
2694  	struct sock *sk = sock->sk, *other;
2695  	unsigned int writable;
2696  	__poll_t mask;
2697  
2698  	sock_poll_wait(file, sock, wait);
2699  	mask = 0;
2700  
2701  	/* exceptional events? */
2702  	if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
2703  		mask |= EPOLLERR |
2704  			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
2705  
2706  	if (sk->sk_shutdown & RCV_SHUTDOWN)
2707  		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2708  	if (sk->sk_shutdown == SHUTDOWN_MASK)
2709  		mask |= EPOLLHUP;
2710  
2711  	/* readable? */
2712  	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2713  		mask |= EPOLLIN | EPOLLRDNORM;
2714  
2715  	/* Connection-based need to check for termination and startup */
2716  	if (sk->sk_type == SOCK_SEQPACKET) {
2717  		if (sk->sk_state == TCP_CLOSE)
2718  			mask |= EPOLLHUP;
2719  		/* connection hasn't started yet? */
2720  		if (sk->sk_state == TCP_SYN_SENT)
2721  			return mask;
2722  	}
2723  
2724  	/* No write status requested, avoid expensive OUT tests. */
2725  	if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2726  		return mask;
2727  
2728  	writable = unix_writable(sk);
2729  	if (writable) {
2730  		unix_state_lock(sk);
2731  
2732  		other = unix_peer(sk);
2733  		if (other && unix_peer(other) != sk &&
2734  		    unix_recvq_full(other) &&
2735  		    unix_dgram_peer_wake_me(sk, other))
2736  			writable = 0;
2737  
2738  		unix_state_unlock(sk);
2739  	}
2740  
2741  	if (writable)
2742  		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2743  	else
2744  		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2745  
2746  	return mask;
2747  }
2748  
2749  #ifdef CONFIG_PROC_FS
2750  
2751  #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2752  
2753  #define get_bucket(x) ((x) >> BUCKET_SPACE)
2754  #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2755  #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2756  
2757  static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2758  {
2759  	unsigned long offset = get_offset(*pos);
2760  	unsigned long bucket = get_bucket(*pos);
2761  	struct sock *sk;
2762  	unsigned long count = 0;
2763  
2764  	for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2765  		if (sock_net(sk) != seq_file_net(seq))
2766  			continue;
2767  		if (++count == offset)
2768  			break;
2769  	}
2770  
2771  	return sk;
2772  }
2773  
2774  static struct sock *unix_next_socket(struct seq_file *seq,
2775  				     struct sock *sk,
2776  				     loff_t *pos)
2777  {
2778  	unsigned long bucket;
2779  
2780  	while (sk > (struct sock *)SEQ_START_TOKEN) {
2781  		sk = sk_next(sk);
2782  		if (!sk)
2783  			goto next_bucket;
2784  		if (sock_net(sk) == seq_file_net(seq))
2785  			return sk;
2786  	}
2787  
2788  	do {
2789  		sk = unix_from_bucket(seq, pos);
2790  		if (sk)
2791  			return sk;
2792  
2793  next_bucket:
2794  		bucket = get_bucket(*pos) + 1;
2795  		*pos = set_bucket_offset(bucket, 1);
2796  	} while (bucket < ARRAY_SIZE(unix_socket_table));
2797  
2798  	return NULL;
2799  }
2800  
2801  static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2802  	__acquires(unix_table_lock)
2803  {
2804  	spin_lock(&unix_table_lock);
2805  
2806  	if (!*pos)
2807  		return SEQ_START_TOKEN;
2808  
2809  	if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2810  		return NULL;
2811  
2812  	return unix_next_socket(seq, NULL, pos);
2813  }
2814  
2815  static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2816  {
2817  	++*pos;
2818  	return unix_next_socket(seq, v, pos);
2819  }
2820  
2821  static void unix_seq_stop(struct seq_file *seq, void *v)
2822  	__releases(unix_table_lock)
2823  {
2824  	spin_unlock(&unix_table_lock);
2825  }
2826  
2827  static int unix_seq_show(struct seq_file *seq, void *v)
2828  {
2829  
2830  	if (v == SEQ_START_TOKEN)
2831  		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2832  			 "Inode Path\n");
2833  	else {
2834  		struct sock *s = v;
2835  		struct unix_sock *u = unix_sk(s);
2836  		unix_state_lock(s);
2837  
2838  		seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2839  			s,
2840  			refcount_read(&s->sk_refcnt),
2841  			0,
2842  			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2843  			s->sk_type,
2844  			s->sk_socket ?
2845  			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2846  			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2847  			sock_i_ino(s));
2848  
2849  		if (u->addr) {	// under unix_table_lock here
2850  			int i, len;
2851  			seq_putc(seq, ' ');
2852  
2853  			i = 0;
2854  			len = u->addr->len - sizeof(short);
2855  			if (!UNIX_ABSTRACT(s))
2856  				len--;
2857  			else {
2858  				seq_putc(seq, '@');
2859  				i++;
2860  			}
2861  			for ( ; i < len; i++)
2862  				seq_putc(seq, u->addr->name->sun_path[i] ?:
2863  					 '@');
2864  		}
2865  		unix_state_unlock(s);
2866  		seq_putc(seq, '\n');
2867  	}
2868  
2869  	return 0;
2870  }
2871  
2872  static const struct seq_operations unix_seq_ops = {
2873  	.start  = unix_seq_start,
2874  	.next   = unix_seq_next,
2875  	.stop   = unix_seq_stop,
2876  	.show   = unix_seq_show,
2877  };
2878  #endif
2879  
2880  static const struct net_proto_family unix_family_ops = {
2881  	.family = PF_UNIX,
2882  	.create = unix_create,
2883  	.owner	= THIS_MODULE,
2884  };
2885  
2886  
2887  static int __net_init unix_net_init(struct net *net)
2888  {
2889  	int error = -ENOMEM;
2890  
2891  	net->unx.sysctl_max_dgram_qlen = 10;
2892  	if (unix_sysctl_register(net))
2893  		goto out;
2894  
2895  #ifdef CONFIG_PROC_FS
2896  	if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
2897  			sizeof(struct seq_net_private))) {
2898  		unix_sysctl_unregister(net);
2899  		goto out;
2900  	}
2901  #endif
2902  	error = 0;
2903  out:
2904  	return error;
2905  }
2906  
2907  static void __net_exit unix_net_exit(struct net *net)
2908  {
2909  	unix_sysctl_unregister(net);
2910  	remove_proc_entry("unix", net->proc_net);
2911  }
2912  
2913  static struct pernet_operations unix_net_ops = {
2914  	.init = unix_net_init,
2915  	.exit = unix_net_exit,
2916  };
2917  
2918  static int __init af_unix_init(void)
2919  {
2920  	int rc = -1;
2921  
2922  	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
2923  
2924  	rc = proto_register(&unix_proto, 1);
2925  	if (rc != 0) {
2926  		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2927  		goto out;
2928  	}
2929  
2930  	sock_register(&unix_family_ops);
2931  	register_pernet_subsys(&unix_net_ops);
2932  out:
2933  	return rc;
2934  }
2935  
2936  static void __exit af_unix_exit(void)
2937  {
2938  	sock_unregister(PF_UNIX);
2939  	proto_unregister(&unix_proto);
2940  	unregister_pernet_subsys(&unix_net_ops);
2941  }
2942  
2943  /* Earlier than device_initcall() so that other drivers invoking
2944     request_module() don't end up in a loop when modprobe tries
2945     to use a UNIX socket. But later than subsys_initcall() because
2946     we depend on stuff initialised there */
2947  fs_initcall(af_unix_init);
2948  module_exit(af_unix_exit);
2949  
2950  MODULE_LICENSE("GPL");
2951  MODULE_ALIAS_NETPROTO(PF_UNIX);
2952