xref: /linux/net/unix/af_unix.c (revision 6dd9379e8f327e70d182b15be3ba21aa2b5d2cba)
1  /*
2   * NET4:	Implementation of BSD Unix domain sockets.
3   *
4   * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
5   *
6   *		This program is free software; you can redistribute it and/or
7   *		modify it under the terms of the GNU General Public License
8   *		as published by the Free Software Foundation; either version
9   *		2 of the License, or (at your option) any later version.
10   *
11   * Fixes:
12   *		Linus Torvalds	:	Assorted bug cures.
13   *		Niibe Yutaka	:	async I/O support.
14   *		Carsten Paeth	:	PF_UNIX check, address fixes.
15   *		Alan Cox	:	Limit size of allocated blocks.
16   *		Alan Cox	:	Fixed the stupid socketpair bug.
17   *		Alan Cox	:	BSD compatibility fine tuning.
18   *		Alan Cox	:	Fixed a bug in connect when interrupted.
19   *		Alan Cox	:	Sorted out a proper draft version of
20   *					file descriptor passing hacked up from
21   *					Mike Shaver's work.
22   *		Marty Leisner	:	Fixes to fd passing
23   *		Nick Nevin	:	recvmsg bugfix.
24   *		Alan Cox	:	Started proper garbage collector
25   *		Heiko EiBfeldt	:	Missing verify_area check
26   *		Alan Cox	:	Started POSIXisms
27   *		Andreas Schwab	:	Replace inode by dentry for proper
28   *					reference counting
29   *		Kirk Petersen	:	Made this a module
30   *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
31   *					Lots of bug fixes.
32   *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
33   *					by above two patches.
34   *	     Andrea Arcangeli	:	If possible we block in connect(2)
35   *					if the max backlog of the listen socket
36   *					is been reached. This won't break
37   *					old apps and it will avoid huge amount
38   *					of socks hashed (this for unix_gc()
39   *					performances reasons).
40   *					Security fix that limits the max
41   *					number of socks to 2*max_files and
42   *					the number of skb queueable in the
43   *					dgram receiver.
44   *		Artur Skawina   :	Hash function optimizations
45   *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
46   *	      Malcolm Beattie   :	Set peercred for socketpair
47   *	     Michal Ostrowski   :       Module initialization cleanup.
48   *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
49   *	     				the core infrastructure is doing that
50   *	     				for all net proto families now (2.5.69+)
51   *
52   *
53   * Known differences from reference BSD that was tested:
54   *
55   *	[TO FIX]
56   *	ECONNREFUSED is not returned from one end of a connected() socket to the
57   *		other the moment one end closes.
58   *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
59   *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
60   *	[NOT TO FIX]
61   *	accept() returns a path name even if the connecting socket has closed
62   *		in the meantime (BSD loses the path and gives up).
63   *	accept() returns 0 length path for an unbound connector. BSD returns 16
64   *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65   *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
66   *	BSD af_unix apparently has connect forgetting to block properly.
67   *		(need to check this with the POSIX spec in detail)
68   *
69   * Differences from 2.0.0-11-... (ANK)
70   *	Bug fixes and improvements.
71   *		- client shutdown killed server socket.
72   *		- removed all useless cli/sti pairs.
73   *
74   *	Semantic changes/extensions.
75   *		- generic control message passing.
76   *		- SCM_CREDENTIALS control message.
77   *		- "Abstract" (not FS based) socket bindings.
78   *		  Abstract names are sequences of bytes (not zero terminated)
79   *		  started by 0, so that this name space does not intersect
80   *		  with BSD names.
81   */
82  
83  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
84  
85  #include <linux/module.h>
86  #include <linux/kernel.h>
87  #include <linux/signal.h>
88  #include <linux/sched.h>
89  #include <linux/errno.h>
90  #include <linux/string.h>
91  #include <linux/stat.h>
92  #include <linux/dcache.h>
93  #include <linux/namei.h>
94  #include <linux/socket.h>
95  #include <linux/un.h>
96  #include <linux/fcntl.h>
97  #include <linux/termios.h>
98  #include <linux/sockios.h>
99  #include <linux/net.h>
100  #include <linux/in.h>
101  #include <linux/fs.h>
102  #include <linux/slab.h>
103  #include <asm/uaccess.h>
104  #include <linux/skbuff.h>
105  #include <linux/netdevice.h>
106  #include <net/net_namespace.h>
107  #include <net/sock.h>
108  #include <net/tcp_states.h>
109  #include <net/af_unix.h>
110  #include <linux/proc_fs.h>
111  #include <linux/seq_file.h>
112  #include <net/scm.h>
113  #include <linux/init.h>
114  #include <linux/poll.h>
115  #include <linux/rtnetlink.h>
116  #include <linux/mount.h>
117  #include <net/checksum.h>
118  #include <linux/security.h>
119  #include <linux/freezer.h>
120  
121  struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
122  EXPORT_SYMBOL_GPL(unix_socket_table);
123  DEFINE_SPINLOCK(unix_table_lock);
124  EXPORT_SYMBOL_GPL(unix_table_lock);
125  static atomic_long_t unix_nr_socks;
126  
127  
128  static struct hlist_head *unix_sockets_unbound(void *addr)
129  {
130  	unsigned long hash = (unsigned long)addr;
131  
132  	hash ^= hash >> 16;
133  	hash ^= hash >> 8;
134  	hash %= UNIX_HASH_SIZE;
135  	return &unix_socket_table[UNIX_HASH_SIZE + hash];
136  }
137  
138  #define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
139  
140  #ifdef CONFIG_SECURITY_NETWORK
141  static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
142  {
143  	UNIXCB(skb).secid = scm->secid;
144  }
145  
146  static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
147  {
148  	scm->secid = UNIXCB(skb).secid;
149  }
150  
151  static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
152  {
153  	return (scm->secid == UNIXCB(skb).secid);
154  }
155  #else
156  static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
157  { }
158  
159  static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
160  { }
161  
162  static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
163  {
164  	return true;
165  }
166  #endif /* CONFIG_SECURITY_NETWORK */
167  
168  /*
169   *  SMP locking strategy:
170   *    hash table is protected with spinlock unix_table_lock
171   *    each socket state is protected by separate spin lock.
172   */
173  
174  static inline unsigned int unix_hash_fold(__wsum n)
175  {
176  	unsigned int hash = (__force unsigned int)csum_fold(n);
177  
178  	hash ^= hash>>8;
179  	return hash&(UNIX_HASH_SIZE-1);
180  }
181  
182  #define unix_peer(sk) (unix_sk(sk)->peer)
183  
184  static inline int unix_our_peer(struct sock *sk, struct sock *osk)
185  {
186  	return unix_peer(osk) == sk;
187  }
188  
189  static inline int unix_may_send(struct sock *sk, struct sock *osk)
190  {
191  	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
192  }
193  
194  static inline int unix_recvq_full(struct sock const *sk)
195  {
196  	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
197  }
198  
199  struct sock *unix_peer_get(struct sock *s)
200  {
201  	struct sock *peer;
202  
203  	unix_state_lock(s);
204  	peer = unix_peer(s);
205  	if (peer)
206  		sock_hold(peer);
207  	unix_state_unlock(s);
208  	return peer;
209  }
210  EXPORT_SYMBOL_GPL(unix_peer_get);
211  
212  static inline void unix_release_addr(struct unix_address *addr)
213  {
214  	if (atomic_dec_and_test(&addr->refcnt))
215  		kfree(addr);
216  }
217  
218  /*
219   *	Check unix socket name:
220   *		- should be not zero length.
221   *	        - if started by not zero, should be NULL terminated (FS object)
222   *		- if started by zero, it is abstract name.
223   */
224  
225  static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
226  {
227  	if (len <= sizeof(short) || len > sizeof(*sunaddr))
228  		return -EINVAL;
229  	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
230  		return -EINVAL;
231  	if (sunaddr->sun_path[0]) {
232  		/*
233  		 * This may look like an off by one error but it is a bit more
234  		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
235  		 * sun_path[108] doesn't as such exist.  However in kernel space
236  		 * we are guaranteed that it is a valid memory location in our
237  		 * kernel address buffer.
238  		 */
239  		((char *)sunaddr)[len] = 0;
240  		len = strlen(sunaddr->sun_path)+1+sizeof(short);
241  		return len;
242  	}
243  
244  	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
245  	return len;
246  }
247  
248  static void __unix_remove_socket(struct sock *sk)
249  {
250  	sk_del_node_init(sk);
251  }
252  
253  static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
254  {
255  	WARN_ON(!sk_unhashed(sk));
256  	sk_add_node(sk, list);
257  }
258  
259  static inline void unix_remove_socket(struct sock *sk)
260  {
261  	spin_lock(&unix_table_lock);
262  	__unix_remove_socket(sk);
263  	spin_unlock(&unix_table_lock);
264  }
265  
266  static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
267  {
268  	spin_lock(&unix_table_lock);
269  	__unix_insert_socket(list, sk);
270  	spin_unlock(&unix_table_lock);
271  }
272  
273  static struct sock *__unix_find_socket_byname(struct net *net,
274  					      struct sockaddr_un *sunname,
275  					      int len, int type, unsigned int hash)
276  {
277  	struct sock *s;
278  
279  	sk_for_each(s, &unix_socket_table[hash ^ type]) {
280  		struct unix_sock *u = unix_sk(s);
281  
282  		if (!net_eq(sock_net(s), net))
283  			continue;
284  
285  		if (u->addr->len == len &&
286  		    !memcmp(u->addr->name, sunname, len))
287  			goto found;
288  	}
289  	s = NULL;
290  found:
291  	return s;
292  }
293  
294  static inline struct sock *unix_find_socket_byname(struct net *net,
295  						   struct sockaddr_un *sunname,
296  						   int len, int type,
297  						   unsigned int hash)
298  {
299  	struct sock *s;
300  
301  	spin_lock(&unix_table_lock);
302  	s = __unix_find_socket_byname(net, sunname, len, type, hash);
303  	if (s)
304  		sock_hold(s);
305  	spin_unlock(&unix_table_lock);
306  	return s;
307  }
308  
309  static struct sock *unix_find_socket_byinode(struct inode *i)
310  {
311  	struct sock *s;
312  
313  	spin_lock(&unix_table_lock);
314  	sk_for_each(s,
315  		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
316  		struct dentry *dentry = unix_sk(s)->path.dentry;
317  
318  		if (dentry && d_backing_inode(dentry) == i) {
319  			sock_hold(s);
320  			goto found;
321  		}
322  	}
323  	s = NULL;
324  found:
325  	spin_unlock(&unix_table_lock);
326  	return s;
327  }
328  
329  /* Support code for asymmetrically connected dgram sockets
330   *
331   * If a datagram socket is connected to a socket not itself connected
332   * to the first socket (eg, /dev/log), clients may only enqueue more
333   * messages if the present receive queue of the server socket is not
334   * "too large". This means there's a second writeability condition
335   * poll and sendmsg need to test. The dgram recv code will do a wake
336   * up on the peer_wait wait queue of a socket upon reception of a
337   * datagram which needs to be propagated to sleeping would-be writers
338   * since these might not have sent anything so far. This can't be
339   * accomplished via poll_wait because the lifetime of the server
340   * socket might be less than that of its clients if these break their
341   * association with it or if the server socket is closed while clients
342   * are still connected to it and there's no way to inform "a polling
343   * implementation" that it should let go of a certain wait queue
344   *
345   * In order to propagate a wake up, a wait_queue_t of the client
346   * socket is enqueued on the peer_wait queue of the server socket
347   * whose wake function does a wake_up on the ordinary client socket
348   * wait queue. This connection is established whenever a write (or
349   * poll for write) hit the flow control condition and broken when the
350   * association to the server socket is dissolved or after a wake up
351   * was relayed.
352   */
353  
354  static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
355  				      void *key)
356  {
357  	struct unix_sock *u;
358  	wait_queue_head_t *u_sleep;
359  
360  	u = container_of(q, struct unix_sock, peer_wake);
361  
362  	__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
363  			    q);
364  	u->peer_wake.private = NULL;
365  
366  	/* relaying can only happen while the wq still exists */
367  	u_sleep = sk_sleep(&u->sk);
368  	if (u_sleep)
369  		wake_up_interruptible_poll(u_sleep, key);
370  
371  	return 0;
372  }
373  
374  static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
375  {
376  	struct unix_sock *u, *u_other;
377  	int rc;
378  
379  	u = unix_sk(sk);
380  	u_other = unix_sk(other);
381  	rc = 0;
382  	spin_lock(&u_other->peer_wait.lock);
383  
384  	if (!u->peer_wake.private) {
385  		u->peer_wake.private = other;
386  		__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
387  
388  		rc = 1;
389  	}
390  
391  	spin_unlock(&u_other->peer_wait.lock);
392  	return rc;
393  }
394  
395  static void unix_dgram_peer_wake_disconnect(struct sock *sk,
396  					    struct sock *other)
397  {
398  	struct unix_sock *u, *u_other;
399  
400  	u = unix_sk(sk);
401  	u_other = unix_sk(other);
402  	spin_lock(&u_other->peer_wait.lock);
403  
404  	if (u->peer_wake.private == other) {
405  		__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
406  		u->peer_wake.private = NULL;
407  	}
408  
409  	spin_unlock(&u_other->peer_wait.lock);
410  }
411  
412  static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
413  						   struct sock *other)
414  {
415  	unix_dgram_peer_wake_disconnect(sk, other);
416  	wake_up_interruptible_poll(sk_sleep(sk),
417  				   POLLOUT |
418  				   POLLWRNORM |
419  				   POLLWRBAND);
420  }
421  
422  /* preconditions:
423   *	- unix_peer(sk) == other
424   *	- association is stable
425   */
426  static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
427  {
428  	int connected;
429  
430  	connected = unix_dgram_peer_wake_connect(sk, other);
431  
432  	if (unix_recvq_full(other))
433  		return 1;
434  
435  	if (connected)
436  		unix_dgram_peer_wake_disconnect(sk, other);
437  
438  	return 0;
439  }
440  
441  static int unix_writable(const struct sock *sk)
442  {
443  	return sk->sk_state != TCP_LISTEN &&
444  	       (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
445  }
446  
447  static void unix_write_space(struct sock *sk)
448  {
449  	struct socket_wq *wq;
450  
451  	rcu_read_lock();
452  	if (unix_writable(sk)) {
453  		wq = rcu_dereference(sk->sk_wq);
454  		if (skwq_has_sleeper(wq))
455  			wake_up_interruptible_sync_poll(&wq->wait,
456  				POLLOUT | POLLWRNORM | POLLWRBAND);
457  		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
458  	}
459  	rcu_read_unlock();
460  }
461  
462  /* When dgram socket disconnects (or changes its peer), we clear its receive
463   * queue of packets arrived from previous peer. First, it allows to do
464   * flow control based only on wmem_alloc; second, sk connected to peer
465   * may receive messages only from that peer. */
466  static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
467  {
468  	if (!skb_queue_empty(&sk->sk_receive_queue)) {
469  		skb_queue_purge(&sk->sk_receive_queue);
470  		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
471  
472  		/* If one link of bidirectional dgram pipe is disconnected,
473  		 * we signal error. Messages are lost. Do not make this,
474  		 * when peer was not connected to us.
475  		 */
476  		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
477  			other->sk_err = ECONNRESET;
478  			other->sk_error_report(other);
479  		}
480  	}
481  }
482  
483  static void unix_sock_destructor(struct sock *sk)
484  {
485  	struct unix_sock *u = unix_sk(sk);
486  
487  	skb_queue_purge(&sk->sk_receive_queue);
488  
489  	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
490  	WARN_ON(!sk_unhashed(sk));
491  	WARN_ON(sk->sk_socket);
492  	if (!sock_flag(sk, SOCK_DEAD)) {
493  		pr_info("Attempt to release alive unix socket: %p\n", sk);
494  		return;
495  	}
496  
497  	if (u->addr)
498  		unix_release_addr(u->addr);
499  
500  	atomic_long_dec(&unix_nr_socks);
501  	local_bh_disable();
502  	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
503  	local_bh_enable();
504  #ifdef UNIX_REFCNT_DEBUG
505  	pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
506  		atomic_long_read(&unix_nr_socks));
507  #endif
508  }
509  
510  static void unix_release_sock(struct sock *sk, int embrion)
511  {
512  	struct unix_sock *u = unix_sk(sk);
513  	struct path path;
514  	struct sock *skpair;
515  	struct sk_buff *skb;
516  	int state;
517  
518  	unix_remove_socket(sk);
519  
520  	/* Clear state */
521  	unix_state_lock(sk);
522  	sock_orphan(sk);
523  	sk->sk_shutdown = SHUTDOWN_MASK;
524  	path	     = u->path;
525  	u->path.dentry = NULL;
526  	u->path.mnt = NULL;
527  	state = sk->sk_state;
528  	sk->sk_state = TCP_CLOSE;
529  	unix_state_unlock(sk);
530  
531  	wake_up_interruptible_all(&u->peer_wait);
532  
533  	skpair = unix_peer(sk);
534  
535  	if (skpair != NULL) {
536  		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
537  			unix_state_lock(skpair);
538  			/* No more writes */
539  			skpair->sk_shutdown = SHUTDOWN_MASK;
540  			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
541  				skpair->sk_err = ECONNRESET;
542  			unix_state_unlock(skpair);
543  			skpair->sk_state_change(skpair);
544  			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
545  		}
546  
547  		unix_dgram_peer_wake_disconnect(sk, skpair);
548  		sock_put(skpair); /* It may now die */
549  		unix_peer(sk) = NULL;
550  	}
551  
552  	/* Try to flush out this socket. Throw out buffers at least */
553  
554  	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
555  		if (state == TCP_LISTEN)
556  			unix_release_sock(skb->sk, 1);
557  		/* passed fds are erased in the kfree_skb hook	      */
558  		UNIXCB(skb).consumed = skb->len;
559  		kfree_skb(skb);
560  	}
561  
562  	if (path.dentry)
563  		path_put(&path);
564  
565  	sock_put(sk);
566  
567  	/* ---- Socket is dead now and most probably destroyed ---- */
568  
569  	/*
570  	 * Fixme: BSD difference: In BSD all sockets connected to us get
571  	 *	  ECONNRESET and we die on the spot. In Linux we behave
572  	 *	  like files and pipes do and wait for the last
573  	 *	  dereference.
574  	 *
575  	 * Can't we simply set sock->err?
576  	 *
577  	 *	  What the above comment does talk about? --ANK(980817)
578  	 */
579  
580  	if (unix_tot_inflight)
581  		unix_gc();		/* Garbage collect fds */
582  }
583  
584  static void init_peercred(struct sock *sk)
585  {
586  	put_pid(sk->sk_peer_pid);
587  	if (sk->sk_peer_cred)
588  		put_cred(sk->sk_peer_cred);
589  	sk->sk_peer_pid  = get_pid(task_tgid(current));
590  	sk->sk_peer_cred = get_current_cred();
591  }
592  
593  static void copy_peercred(struct sock *sk, struct sock *peersk)
594  {
595  	put_pid(sk->sk_peer_pid);
596  	if (sk->sk_peer_cred)
597  		put_cred(sk->sk_peer_cred);
598  	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
599  	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
600  }
601  
602  static int unix_listen(struct socket *sock, int backlog)
603  {
604  	int err;
605  	struct sock *sk = sock->sk;
606  	struct unix_sock *u = unix_sk(sk);
607  	struct pid *old_pid = NULL;
608  
609  	err = -EOPNOTSUPP;
610  	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
611  		goto out;	/* Only stream/seqpacket sockets accept */
612  	err = -EINVAL;
613  	if (!u->addr)
614  		goto out;	/* No listens on an unbound socket */
615  	unix_state_lock(sk);
616  	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
617  		goto out_unlock;
618  	if (backlog > sk->sk_max_ack_backlog)
619  		wake_up_interruptible_all(&u->peer_wait);
620  	sk->sk_max_ack_backlog	= backlog;
621  	sk->sk_state		= TCP_LISTEN;
622  	/* set credentials so connect can copy them */
623  	init_peercred(sk);
624  	err = 0;
625  
626  out_unlock:
627  	unix_state_unlock(sk);
628  	put_pid(old_pid);
629  out:
630  	return err;
631  }
632  
633  static int unix_release(struct socket *);
634  static int unix_bind(struct socket *, struct sockaddr *, int);
635  static int unix_stream_connect(struct socket *, struct sockaddr *,
636  			       int addr_len, int flags);
637  static int unix_socketpair(struct socket *, struct socket *);
638  static int unix_accept(struct socket *, struct socket *, int);
639  static int unix_getname(struct socket *, struct sockaddr *, int *, int);
640  static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
641  static unsigned int unix_dgram_poll(struct file *, struct socket *,
642  				    poll_table *);
643  static int unix_ioctl(struct socket *, unsigned int, unsigned long);
644  static int unix_shutdown(struct socket *, int);
645  static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
646  static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
647  static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
648  				    size_t size, int flags);
649  static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
650  				       struct pipe_inode_info *, size_t size,
651  				       unsigned int flags);
652  static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
653  static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
654  static int unix_dgram_connect(struct socket *, struct sockaddr *,
655  			      int, int);
656  static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
657  static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
658  				  int);
659  
660  static int unix_set_peek_off(struct sock *sk, int val)
661  {
662  	struct unix_sock *u = unix_sk(sk);
663  
664  	if (mutex_lock_interruptible(&u->readlock))
665  		return -EINTR;
666  
667  	sk->sk_peek_off = val;
668  	mutex_unlock(&u->readlock);
669  
670  	return 0;
671  }
672  
673  
674  static const struct proto_ops unix_stream_ops = {
675  	.family =	PF_UNIX,
676  	.owner =	THIS_MODULE,
677  	.release =	unix_release,
678  	.bind =		unix_bind,
679  	.connect =	unix_stream_connect,
680  	.socketpair =	unix_socketpair,
681  	.accept =	unix_accept,
682  	.getname =	unix_getname,
683  	.poll =		unix_poll,
684  	.ioctl =	unix_ioctl,
685  	.listen =	unix_listen,
686  	.shutdown =	unix_shutdown,
687  	.setsockopt =	sock_no_setsockopt,
688  	.getsockopt =	sock_no_getsockopt,
689  	.sendmsg =	unix_stream_sendmsg,
690  	.recvmsg =	unix_stream_recvmsg,
691  	.mmap =		sock_no_mmap,
692  	.sendpage =	unix_stream_sendpage,
693  	.splice_read =	unix_stream_splice_read,
694  	.set_peek_off =	unix_set_peek_off,
695  };
696  
697  static const struct proto_ops unix_dgram_ops = {
698  	.family =	PF_UNIX,
699  	.owner =	THIS_MODULE,
700  	.release =	unix_release,
701  	.bind =		unix_bind,
702  	.connect =	unix_dgram_connect,
703  	.socketpair =	unix_socketpair,
704  	.accept =	sock_no_accept,
705  	.getname =	unix_getname,
706  	.poll =		unix_dgram_poll,
707  	.ioctl =	unix_ioctl,
708  	.listen =	sock_no_listen,
709  	.shutdown =	unix_shutdown,
710  	.setsockopt =	sock_no_setsockopt,
711  	.getsockopt =	sock_no_getsockopt,
712  	.sendmsg =	unix_dgram_sendmsg,
713  	.recvmsg =	unix_dgram_recvmsg,
714  	.mmap =		sock_no_mmap,
715  	.sendpage =	sock_no_sendpage,
716  	.set_peek_off =	unix_set_peek_off,
717  };
718  
719  static const struct proto_ops unix_seqpacket_ops = {
720  	.family =	PF_UNIX,
721  	.owner =	THIS_MODULE,
722  	.release =	unix_release,
723  	.bind =		unix_bind,
724  	.connect =	unix_stream_connect,
725  	.socketpair =	unix_socketpair,
726  	.accept =	unix_accept,
727  	.getname =	unix_getname,
728  	.poll =		unix_dgram_poll,
729  	.ioctl =	unix_ioctl,
730  	.listen =	unix_listen,
731  	.shutdown =	unix_shutdown,
732  	.setsockopt =	sock_no_setsockopt,
733  	.getsockopt =	sock_no_getsockopt,
734  	.sendmsg =	unix_seqpacket_sendmsg,
735  	.recvmsg =	unix_seqpacket_recvmsg,
736  	.mmap =		sock_no_mmap,
737  	.sendpage =	sock_no_sendpage,
738  	.set_peek_off =	unix_set_peek_off,
739  };
740  
741  static struct proto unix_proto = {
742  	.name			= "UNIX",
743  	.owner			= THIS_MODULE,
744  	.obj_size		= sizeof(struct unix_sock),
745  };
746  
747  /*
748   * AF_UNIX sockets do not interact with hardware, hence they
749   * dont trigger interrupts - so it's safe for them to have
750   * bh-unsafe locking for their sk_receive_queue.lock. Split off
751   * this special lock-class by reinitializing the spinlock key:
752   */
753  static struct lock_class_key af_unix_sk_receive_queue_lock_key;
754  
755  static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
756  {
757  	struct sock *sk = NULL;
758  	struct unix_sock *u;
759  
760  	atomic_long_inc(&unix_nr_socks);
761  	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
762  		goto out;
763  
764  	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
765  	if (!sk)
766  		goto out;
767  
768  	sock_init_data(sock, sk);
769  	lockdep_set_class(&sk->sk_receive_queue.lock,
770  				&af_unix_sk_receive_queue_lock_key);
771  
772  	sk->sk_write_space	= unix_write_space;
773  	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
774  	sk->sk_destruct		= unix_sock_destructor;
775  	u	  = unix_sk(sk);
776  	u->path.dentry = NULL;
777  	u->path.mnt = NULL;
778  	spin_lock_init(&u->lock);
779  	atomic_long_set(&u->inflight, 0);
780  	INIT_LIST_HEAD(&u->link);
781  	mutex_init(&u->readlock); /* single task reading lock */
782  	init_waitqueue_head(&u->peer_wait);
783  	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
784  	unix_insert_socket(unix_sockets_unbound(sk), sk);
785  out:
786  	if (sk == NULL)
787  		atomic_long_dec(&unix_nr_socks);
788  	else {
789  		local_bh_disable();
790  		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
791  		local_bh_enable();
792  	}
793  	return sk;
794  }
795  
796  static int unix_create(struct net *net, struct socket *sock, int protocol,
797  		       int kern)
798  {
799  	if (protocol && protocol != PF_UNIX)
800  		return -EPROTONOSUPPORT;
801  
802  	sock->state = SS_UNCONNECTED;
803  
804  	switch (sock->type) {
805  	case SOCK_STREAM:
806  		sock->ops = &unix_stream_ops;
807  		break;
808  		/*
809  		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
810  		 *	nothing uses it.
811  		 */
812  	case SOCK_RAW:
813  		sock->type = SOCK_DGRAM;
814  	case SOCK_DGRAM:
815  		sock->ops = &unix_dgram_ops;
816  		break;
817  	case SOCK_SEQPACKET:
818  		sock->ops = &unix_seqpacket_ops;
819  		break;
820  	default:
821  		return -ESOCKTNOSUPPORT;
822  	}
823  
824  	return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
825  }
826  
827  static int unix_release(struct socket *sock)
828  {
829  	struct sock *sk = sock->sk;
830  
831  	if (!sk)
832  		return 0;
833  
834  	unix_release_sock(sk, 0);
835  	sock->sk = NULL;
836  
837  	return 0;
838  }
839  
840  static int unix_autobind(struct socket *sock)
841  {
842  	struct sock *sk = sock->sk;
843  	struct net *net = sock_net(sk);
844  	struct unix_sock *u = unix_sk(sk);
845  	static u32 ordernum = 1;
846  	struct unix_address *addr;
847  	int err;
848  	unsigned int retries = 0;
849  
850  	err = mutex_lock_interruptible(&u->readlock);
851  	if (err)
852  		return err;
853  
854  	err = 0;
855  	if (u->addr)
856  		goto out;
857  
858  	err = -ENOMEM;
859  	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
860  	if (!addr)
861  		goto out;
862  
863  	addr->name->sun_family = AF_UNIX;
864  	atomic_set(&addr->refcnt, 1);
865  
866  retry:
867  	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
868  	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
869  
870  	spin_lock(&unix_table_lock);
871  	ordernum = (ordernum+1)&0xFFFFF;
872  
873  	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
874  				      addr->hash)) {
875  		spin_unlock(&unix_table_lock);
876  		/*
877  		 * __unix_find_socket_byname() may take long time if many names
878  		 * are already in use.
879  		 */
880  		cond_resched();
881  		/* Give up if all names seems to be in use. */
882  		if (retries++ == 0xFFFFF) {
883  			err = -ENOSPC;
884  			kfree(addr);
885  			goto out;
886  		}
887  		goto retry;
888  	}
889  	addr->hash ^= sk->sk_type;
890  
891  	__unix_remove_socket(sk);
892  	u->addr = addr;
893  	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
894  	spin_unlock(&unix_table_lock);
895  	err = 0;
896  
897  out:	mutex_unlock(&u->readlock);
898  	return err;
899  }
900  
901  static struct sock *unix_find_other(struct net *net,
902  				    struct sockaddr_un *sunname, int len,
903  				    int type, unsigned int hash, int *error)
904  {
905  	struct sock *u;
906  	struct path path;
907  	int err = 0;
908  
909  	if (sunname->sun_path[0]) {
910  		struct inode *inode;
911  		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
912  		if (err)
913  			goto fail;
914  		inode = d_backing_inode(path.dentry);
915  		err = inode_permission(inode, MAY_WRITE);
916  		if (err)
917  			goto put_fail;
918  
919  		err = -ECONNREFUSED;
920  		if (!S_ISSOCK(inode->i_mode))
921  			goto put_fail;
922  		u = unix_find_socket_byinode(inode);
923  		if (!u)
924  			goto put_fail;
925  
926  		if (u->sk_type == type)
927  			touch_atime(&path);
928  
929  		path_put(&path);
930  
931  		err = -EPROTOTYPE;
932  		if (u->sk_type != type) {
933  			sock_put(u);
934  			goto fail;
935  		}
936  	} else {
937  		err = -ECONNREFUSED;
938  		u = unix_find_socket_byname(net, sunname, len, type, hash);
939  		if (u) {
940  			struct dentry *dentry;
941  			dentry = unix_sk(u)->path.dentry;
942  			if (dentry)
943  				touch_atime(&unix_sk(u)->path);
944  		} else
945  			goto fail;
946  	}
947  	return u;
948  
949  put_fail:
950  	path_put(&path);
951  fail:
952  	*error = err;
953  	return NULL;
954  }
955  
956  static int unix_mknod(struct dentry *dentry, const struct path *path, umode_t mode,
957  		      struct path *res)
958  {
959  	int err;
960  
961  	err = security_path_mknod(path, dentry, mode, 0);
962  	if (!err) {
963  		err = vfs_mknod(d_inode(path->dentry), dentry, mode, 0);
964  		if (!err) {
965  			res->mnt = mntget(path->mnt);
966  			res->dentry = dget(dentry);
967  		}
968  	}
969  
970  	return err;
971  }
972  
973  static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
974  {
975  	struct sock *sk = sock->sk;
976  	struct net *net = sock_net(sk);
977  	struct unix_sock *u = unix_sk(sk);
978  	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
979  	char *sun_path = sunaddr->sun_path;
980  	int err, name_err;
981  	unsigned int hash;
982  	struct unix_address *addr;
983  	struct hlist_head *list;
984  	struct path path;
985  	struct dentry *dentry;
986  
987  	err = -EINVAL;
988  	if (sunaddr->sun_family != AF_UNIX)
989  		goto out;
990  
991  	if (addr_len == sizeof(short)) {
992  		err = unix_autobind(sock);
993  		goto out;
994  	}
995  
996  	err = unix_mkname(sunaddr, addr_len, &hash);
997  	if (err < 0)
998  		goto out;
999  	addr_len = err;
1000  
1001  	name_err = 0;
1002  	dentry = NULL;
1003  	if (sun_path[0]) {
1004  		/* Get the parent directory, calculate the hash for last
1005  		 * component.
1006  		 */
1007  		dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
1008  
1009  		if (IS_ERR(dentry)) {
1010  			/* delay report until after 'already bound' check */
1011  			name_err = PTR_ERR(dentry);
1012  			dentry = NULL;
1013  		}
1014  	}
1015  
1016  	err = mutex_lock_interruptible(&u->readlock);
1017  	if (err)
1018  		goto out_path;
1019  
1020  	err = -EINVAL;
1021  	if (u->addr)
1022  		goto out_up;
1023  
1024  	if (name_err) {
1025  		err = name_err == -EEXIST ? -EADDRINUSE : name_err;
1026  		goto out_up;
1027  	}
1028  
1029  	err = -ENOMEM;
1030  	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1031  	if (!addr)
1032  		goto out_up;
1033  
1034  	memcpy(addr->name, sunaddr, addr_len);
1035  	addr->len = addr_len;
1036  	addr->hash = hash ^ sk->sk_type;
1037  	atomic_set(&addr->refcnt, 1);
1038  
1039  	if (dentry) {
1040  		struct path u_path;
1041  		umode_t mode = S_IFSOCK |
1042  		       (SOCK_INODE(sock)->i_mode & ~current_umask());
1043  		err = unix_mknod(dentry, &path, mode, &u_path);
1044  		if (err) {
1045  			if (err == -EEXIST)
1046  				err = -EADDRINUSE;
1047  			unix_release_addr(addr);
1048  			goto out_up;
1049  		}
1050  		addr->hash = UNIX_HASH_SIZE;
1051  		hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1052  		spin_lock(&unix_table_lock);
1053  		u->path = u_path;
1054  		list = &unix_socket_table[hash];
1055  	} else {
1056  		spin_lock(&unix_table_lock);
1057  		err = -EADDRINUSE;
1058  		if (__unix_find_socket_byname(net, sunaddr, addr_len,
1059  					      sk->sk_type, hash)) {
1060  			unix_release_addr(addr);
1061  			goto out_unlock;
1062  		}
1063  
1064  		list = &unix_socket_table[addr->hash];
1065  	}
1066  
1067  	err = 0;
1068  	__unix_remove_socket(sk);
1069  	u->addr = addr;
1070  	__unix_insert_socket(list, sk);
1071  
1072  out_unlock:
1073  	spin_unlock(&unix_table_lock);
1074  out_up:
1075  	mutex_unlock(&u->readlock);
1076  out_path:
1077  	if (dentry)
1078  		done_path_create(&path, dentry);
1079  
1080  out:
1081  	return err;
1082  }
1083  
1084  static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1085  {
1086  	if (unlikely(sk1 == sk2) || !sk2) {
1087  		unix_state_lock(sk1);
1088  		return;
1089  	}
1090  	if (sk1 < sk2) {
1091  		unix_state_lock(sk1);
1092  		unix_state_lock_nested(sk2);
1093  	} else {
1094  		unix_state_lock(sk2);
1095  		unix_state_lock_nested(sk1);
1096  	}
1097  }
1098  
1099  static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1100  {
1101  	if (unlikely(sk1 == sk2) || !sk2) {
1102  		unix_state_unlock(sk1);
1103  		return;
1104  	}
1105  	unix_state_unlock(sk1);
1106  	unix_state_unlock(sk2);
1107  }
1108  
1109  static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1110  			      int alen, int flags)
1111  {
1112  	struct sock *sk = sock->sk;
1113  	struct net *net = sock_net(sk);
1114  	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1115  	struct sock *other;
1116  	unsigned int hash;
1117  	int err;
1118  
1119  	if (addr->sa_family != AF_UNSPEC) {
1120  		err = unix_mkname(sunaddr, alen, &hash);
1121  		if (err < 0)
1122  			goto out;
1123  		alen = err;
1124  
1125  		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1126  		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1127  			goto out;
1128  
1129  restart:
1130  		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1131  		if (!other)
1132  			goto out;
1133  
1134  		unix_state_double_lock(sk, other);
1135  
1136  		/* Apparently VFS overslept socket death. Retry. */
1137  		if (sock_flag(other, SOCK_DEAD)) {
1138  			unix_state_double_unlock(sk, other);
1139  			sock_put(other);
1140  			goto restart;
1141  		}
1142  
1143  		err = -EPERM;
1144  		if (!unix_may_send(sk, other))
1145  			goto out_unlock;
1146  
1147  		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1148  		if (err)
1149  			goto out_unlock;
1150  
1151  	} else {
1152  		/*
1153  		 *	1003.1g breaking connected state with AF_UNSPEC
1154  		 */
1155  		other = NULL;
1156  		unix_state_double_lock(sk, other);
1157  	}
1158  
1159  	/*
1160  	 * If it was connected, reconnect.
1161  	 */
1162  	if (unix_peer(sk)) {
1163  		struct sock *old_peer = unix_peer(sk);
1164  		unix_peer(sk) = other;
1165  		unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1166  
1167  		unix_state_double_unlock(sk, other);
1168  
1169  		if (other != old_peer)
1170  			unix_dgram_disconnected(sk, old_peer);
1171  		sock_put(old_peer);
1172  	} else {
1173  		unix_peer(sk) = other;
1174  		unix_state_double_unlock(sk, other);
1175  	}
1176  	return 0;
1177  
1178  out_unlock:
1179  	unix_state_double_unlock(sk, other);
1180  	sock_put(other);
1181  out:
1182  	return err;
1183  }
1184  
1185  static long unix_wait_for_peer(struct sock *other, long timeo)
1186  {
1187  	struct unix_sock *u = unix_sk(other);
1188  	int sched;
1189  	DEFINE_WAIT(wait);
1190  
1191  	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1192  
1193  	sched = !sock_flag(other, SOCK_DEAD) &&
1194  		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1195  		unix_recvq_full(other);
1196  
1197  	unix_state_unlock(other);
1198  
1199  	if (sched)
1200  		timeo = schedule_timeout(timeo);
1201  
1202  	finish_wait(&u->peer_wait, &wait);
1203  	return timeo;
1204  }
1205  
1206  static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1207  			       int addr_len, int flags)
1208  {
1209  	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1210  	struct sock *sk = sock->sk;
1211  	struct net *net = sock_net(sk);
1212  	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1213  	struct sock *newsk = NULL;
1214  	struct sock *other = NULL;
1215  	struct sk_buff *skb = NULL;
1216  	unsigned int hash;
1217  	int st;
1218  	int err;
1219  	long timeo;
1220  
1221  	err = unix_mkname(sunaddr, addr_len, &hash);
1222  	if (err < 0)
1223  		goto out;
1224  	addr_len = err;
1225  
1226  	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1227  	    (err = unix_autobind(sock)) != 0)
1228  		goto out;
1229  
1230  	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1231  
1232  	/* First of all allocate resources.
1233  	   If we will make it after state is locked,
1234  	   we will have to recheck all again in any case.
1235  	 */
1236  
1237  	err = -ENOMEM;
1238  
1239  	/* create new sock for complete connection */
1240  	newsk = unix_create1(sock_net(sk), NULL, 0);
1241  	if (newsk == NULL)
1242  		goto out;
1243  
1244  	/* Allocate skb for sending to listening sock */
1245  	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1246  	if (skb == NULL)
1247  		goto out;
1248  
1249  restart:
1250  	/*  Find listening sock. */
1251  	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1252  	if (!other)
1253  		goto out;
1254  
1255  	/* Latch state of peer */
1256  	unix_state_lock(other);
1257  
1258  	/* Apparently VFS overslept socket death. Retry. */
1259  	if (sock_flag(other, SOCK_DEAD)) {
1260  		unix_state_unlock(other);
1261  		sock_put(other);
1262  		goto restart;
1263  	}
1264  
1265  	err = -ECONNREFUSED;
1266  	if (other->sk_state != TCP_LISTEN)
1267  		goto out_unlock;
1268  	if (other->sk_shutdown & RCV_SHUTDOWN)
1269  		goto out_unlock;
1270  
1271  	if (unix_recvq_full(other)) {
1272  		err = -EAGAIN;
1273  		if (!timeo)
1274  			goto out_unlock;
1275  
1276  		timeo = unix_wait_for_peer(other, timeo);
1277  
1278  		err = sock_intr_errno(timeo);
1279  		if (signal_pending(current))
1280  			goto out;
1281  		sock_put(other);
1282  		goto restart;
1283  	}
1284  
1285  	/* Latch our state.
1286  
1287  	   It is tricky place. We need to grab our state lock and cannot
1288  	   drop lock on peer. It is dangerous because deadlock is
1289  	   possible. Connect to self case and simultaneous
1290  	   attempt to connect are eliminated by checking socket
1291  	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1292  	   check this before attempt to grab lock.
1293  
1294  	   Well, and we have to recheck the state after socket locked.
1295  	 */
1296  	st = sk->sk_state;
1297  
1298  	switch (st) {
1299  	case TCP_CLOSE:
1300  		/* This is ok... continue with connect */
1301  		break;
1302  	case TCP_ESTABLISHED:
1303  		/* Socket is already connected */
1304  		err = -EISCONN;
1305  		goto out_unlock;
1306  	default:
1307  		err = -EINVAL;
1308  		goto out_unlock;
1309  	}
1310  
1311  	unix_state_lock_nested(sk);
1312  
1313  	if (sk->sk_state != st) {
1314  		unix_state_unlock(sk);
1315  		unix_state_unlock(other);
1316  		sock_put(other);
1317  		goto restart;
1318  	}
1319  
1320  	err = security_unix_stream_connect(sk, other, newsk);
1321  	if (err) {
1322  		unix_state_unlock(sk);
1323  		goto out_unlock;
1324  	}
1325  
1326  	/* The way is open! Fastly set all the necessary fields... */
1327  
1328  	sock_hold(sk);
1329  	unix_peer(newsk)	= sk;
1330  	newsk->sk_state		= TCP_ESTABLISHED;
1331  	newsk->sk_type		= sk->sk_type;
1332  	init_peercred(newsk);
1333  	newu = unix_sk(newsk);
1334  	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1335  	otheru = unix_sk(other);
1336  
1337  	/* copy address information from listening to new sock*/
1338  	if (otheru->addr) {
1339  		atomic_inc(&otheru->addr->refcnt);
1340  		newu->addr = otheru->addr;
1341  	}
1342  	if (otheru->path.dentry) {
1343  		path_get(&otheru->path);
1344  		newu->path = otheru->path;
1345  	}
1346  
1347  	/* Set credentials */
1348  	copy_peercred(sk, other);
1349  
1350  	sock->state	= SS_CONNECTED;
1351  	sk->sk_state	= TCP_ESTABLISHED;
1352  	sock_hold(newsk);
1353  
1354  	smp_mb__after_atomic();	/* sock_hold() does an atomic_inc() */
1355  	unix_peer(sk)	= newsk;
1356  
1357  	unix_state_unlock(sk);
1358  
1359  	/* take ten and and send info to listening sock */
1360  	spin_lock(&other->sk_receive_queue.lock);
1361  	__skb_queue_tail(&other->sk_receive_queue, skb);
1362  	spin_unlock(&other->sk_receive_queue.lock);
1363  	unix_state_unlock(other);
1364  	other->sk_data_ready(other);
1365  	sock_put(other);
1366  	return 0;
1367  
1368  out_unlock:
1369  	if (other)
1370  		unix_state_unlock(other);
1371  
1372  out:
1373  	kfree_skb(skb);
1374  	if (newsk)
1375  		unix_release_sock(newsk, 0);
1376  	if (other)
1377  		sock_put(other);
1378  	return err;
1379  }
1380  
1381  static int unix_socketpair(struct socket *socka, struct socket *sockb)
1382  {
1383  	struct sock *ska = socka->sk, *skb = sockb->sk;
1384  
1385  	/* Join our sockets back to back */
1386  	sock_hold(ska);
1387  	sock_hold(skb);
1388  	unix_peer(ska) = skb;
1389  	unix_peer(skb) = ska;
1390  	init_peercred(ska);
1391  	init_peercred(skb);
1392  
1393  	if (ska->sk_type != SOCK_DGRAM) {
1394  		ska->sk_state = TCP_ESTABLISHED;
1395  		skb->sk_state = TCP_ESTABLISHED;
1396  		socka->state  = SS_CONNECTED;
1397  		sockb->state  = SS_CONNECTED;
1398  	}
1399  	return 0;
1400  }
1401  
1402  static void unix_sock_inherit_flags(const struct socket *old,
1403  				    struct socket *new)
1404  {
1405  	if (test_bit(SOCK_PASSCRED, &old->flags))
1406  		set_bit(SOCK_PASSCRED, &new->flags);
1407  	if (test_bit(SOCK_PASSSEC, &old->flags))
1408  		set_bit(SOCK_PASSSEC, &new->flags);
1409  }
1410  
1411  static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1412  {
1413  	struct sock *sk = sock->sk;
1414  	struct sock *tsk;
1415  	struct sk_buff *skb;
1416  	int err;
1417  
1418  	err = -EOPNOTSUPP;
1419  	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1420  		goto out;
1421  
1422  	err = -EINVAL;
1423  	if (sk->sk_state != TCP_LISTEN)
1424  		goto out;
1425  
1426  	/* If socket state is TCP_LISTEN it cannot change (for now...),
1427  	 * so that no locks are necessary.
1428  	 */
1429  
1430  	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1431  	if (!skb) {
1432  		/* This means receive shutdown. */
1433  		if (err == 0)
1434  			err = -EINVAL;
1435  		goto out;
1436  	}
1437  
1438  	tsk = skb->sk;
1439  	skb_free_datagram(sk, skb);
1440  	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1441  
1442  	/* attach accepted sock to socket */
1443  	unix_state_lock(tsk);
1444  	newsock->state = SS_CONNECTED;
1445  	unix_sock_inherit_flags(sock, newsock);
1446  	sock_graft(tsk, newsock);
1447  	unix_state_unlock(tsk);
1448  	return 0;
1449  
1450  out:
1451  	return err;
1452  }
1453  
1454  
1455  static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1456  {
1457  	struct sock *sk = sock->sk;
1458  	struct unix_sock *u;
1459  	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1460  	int err = 0;
1461  
1462  	if (peer) {
1463  		sk = unix_peer_get(sk);
1464  
1465  		err = -ENOTCONN;
1466  		if (!sk)
1467  			goto out;
1468  		err = 0;
1469  	} else {
1470  		sock_hold(sk);
1471  	}
1472  
1473  	u = unix_sk(sk);
1474  	unix_state_lock(sk);
1475  	if (!u->addr) {
1476  		sunaddr->sun_family = AF_UNIX;
1477  		sunaddr->sun_path[0] = 0;
1478  		*uaddr_len = sizeof(short);
1479  	} else {
1480  		struct unix_address *addr = u->addr;
1481  
1482  		*uaddr_len = addr->len;
1483  		memcpy(sunaddr, addr->name, *uaddr_len);
1484  	}
1485  	unix_state_unlock(sk);
1486  	sock_put(sk);
1487  out:
1488  	return err;
1489  }
1490  
1491  static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1492  {
1493  	int i;
1494  
1495  	scm->fp = UNIXCB(skb).fp;
1496  	UNIXCB(skb).fp = NULL;
1497  
1498  	for (i = scm->fp->count-1; i >= 0; i--)
1499  		unix_notinflight(scm->fp->user, scm->fp->fp[i]);
1500  }
1501  
1502  static void unix_destruct_scm(struct sk_buff *skb)
1503  {
1504  	struct scm_cookie scm;
1505  	memset(&scm, 0, sizeof(scm));
1506  	scm.pid  = UNIXCB(skb).pid;
1507  	if (UNIXCB(skb).fp)
1508  		unix_detach_fds(&scm, skb);
1509  
1510  	/* Alas, it calls VFS */
1511  	/* So fscking what? fput() had been SMP-safe since the last Summer */
1512  	scm_destroy(&scm);
1513  	sock_wfree(skb);
1514  }
1515  
1516  /*
1517   * The "user->unix_inflight" variable is protected by the garbage
1518   * collection lock, and we just read it locklessly here. If you go
1519   * over the limit, there might be a tiny race in actually noticing
1520   * it across threads. Tough.
1521   */
1522  static inline bool too_many_unix_fds(struct task_struct *p)
1523  {
1524  	struct user_struct *user = current_user();
1525  
1526  	if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
1527  		return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1528  	return false;
1529  }
1530  
1531  #define MAX_RECURSION_LEVEL 4
1532  
1533  static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1534  {
1535  	int i;
1536  	unsigned char max_level = 0;
1537  
1538  	if (too_many_unix_fds(current))
1539  		return -ETOOMANYREFS;
1540  
1541  	for (i = scm->fp->count - 1; i >= 0; i--) {
1542  		struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1543  
1544  		if (sk)
1545  			max_level = max(max_level,
1546  					unix_sk(sk)->recursion_level);
1547  	}
1548  	if (unlikely(max_level > MAX_RECURSION_LEVEL))
1549  		return -ETOOMANYREFS;
1550  
1551  	/*
1552  	 * Need to duplicate file references for the sake of garbage
1553  	 * collection.  Otherwise a socket in the fps might become a
1554  	 * candidate for GC while the skb is not yet queued.
1555  	 */
1556  	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1557  	if (!UNIXCB(skb).fp)
1558  		return -ENOMEM;
1559  
1560  	for (i = scm->fp->count - 1; i >= 0; i--)
1561  		unix_inflight(scm->fp->user, scm->fp->fp[i]);
1562  	return max_level;
1563  }
1564  
1565  static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1566  {
1567  	int err = 0;
1568  
1569  	UNIXCB(skb).pid  = get_pid(scm->pid);
1570  	UNIXCB(skb).uid = scm->creds.uid;
1571  	UNIXCB(skb).gid = scm->creds.gid;
1572  	UNIXCB(skb).fp = NULL;
1573  	unix_get_secdata(scm, skb);
1574  	if (scm->fp && send_fds)
1575  		err = unix_attach_fds(scm, skb);
1576  
1577  	skb->destructor = unix_destruct_scm;
1578  	return err;
1579  }
1580  
1581  static bool unix_passcred_enabled(const struct socket *sock,
1582  				  const struct sock *other)
1583  {
1584  	return test_bit(SOCK_PASSCRED, &sock->flags) ||
1585  	       !other->sk_socket ||
1586  	       test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1587  }
1588  
1589  /*
1590   * Some apps rely on write() giving SCM_CREDENTIALS
1591   * We include credentials if source or destination socket
1592   * asserted SOCK_PASSCRED.
1593   */
1594  static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1595  			    const struct sock *other)
1596  {
1597  	if (UNIXCB(skb).pid)
1598  		return;
1599  	if (unix_passcred_enabled(sock, other)) {
1600  		UNIXCB(skb).pid  = get_pid(task_tgid(current));
1601  		current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1602  	}
1603  }
1604  
1605  static int maybe_init_creds(struct scm_cookie *scm,
1606  			    struct socket *socket,
1607  			    const struct sock *other)
1608  {
1609  	int err;
1610  	struct msghdr msg = { .msg_controllen = 0 };
1611  
1612  	err = scm_send(socket, &msg, scm, false);
1613  	if (err)
1614  		return err;
1615  
1616  	if (unix_passcred_enabled(socket, other)) {
1617  		scm->pid = get_pid(task_tgid(current));
1618  		current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1619  	}
1620  	return err;
1621  }
1622  
1623  static bool unix_skb_scm_eq(struct sk_buff *skb,
1624  			    struct scm_cookie *scm)
1625  {
1626  	const struct unix_skb_parms *u = &UNIXCB(skb);
1627  
1628  	return u->pid == scm->pid &&
1629  	       uid_eq(u->uid, scm->creds.uid) &&
1630  	       gid_eq(u->gid, scm->creds.gid) &&
1631  	       unix_secdata_eq(scm, skb);
1632  }
1633  
1634  /*
1635   *	Send AF_UNIX data.
1636   */
1637  
1638  static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1639  			      size_t len)
1640  {
1641  	struct sock *sk = sock->sk;
1642  	struct net *net = sock_net(sk);
1643  	struct unix_sock *u = unix_sk(sk);
1644  	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1645  	struct sock *other = NULL;
1646  	int namelen = 0; /* fake GCC */
1647  	int err;
1648  	unsigned int hash;
1649  	struct sk_buff *skb;
1650  	long timeo;
1651  	struct scm_cookie scm;
1652  	int max_level;
1653  	int data_len = 0;
1654  	int sk_locked;
1655  
1656  	wait_for_unix_gc();
1657  	err = scm_send(sock, msg, &scm, false);
1658  	if (err < 0)
1659  		return err;
1660  
1661  	err = -EOPNOTSUPP;
1662  	if (msg->msg_flags&MSG_OOB)
1663  		goto out;
1664  
1665  	if (msg->msg_namelen) {
1666  		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1667  		if (err < 0)
1668  			goto out;
1669  		namelen = err;
1670  	} else {
1671  		sunaddr = NULL;
1672  		err = -ENOTCONN;
1673  		other = unix_peer_get(sk);
1674  		if (!other)
1675  			goto out;
1676  	}
1677  
1678  	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1679  	    && (err = unix_autobind(sock)) != 0)
1680  		goto out;
1681  
1682  	err = -EMSGSIZE;
1683  	if (len > sk->sk_sndbuf - 32)
1684  		goto out;
1685  
1686  	if (len > SKB_MAX_ALLOC) {
1687  		data_len = min_t(size_t,
1688  				 len - SKB_MAX_ALLOC,
1689  				 MAX_SKB_FRAGS * PAGE_SIZE);
1690  		data_len = PAGE_ALIGN(data_len);
1691  
1692  		BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1693  	}
1694  
1695  	skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1696  				   msg->msg_flags & MSG_DONTWAIT, &err,
1697  				   PAGE_ALLOC_COSTLY_ORDER);
1698  	if (skb == NULL)
1699  		goto out;
1700  
1701  	err = unix_scm_to_skb(&scm, skb, true);
1702  	if (err < 0)
1703  		goto out_free;
1704  	max_level = err + 1;
1705  
1706  	skb_put(skb, len - data_len);
1707  	skb->data_len = data_len;
1708  	skb->len = len;
1709  	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1710  	if (err)
1711  		goto out_free;
1712  
1713  	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1714  
1715  restart:
1716  	if (!other) {
1717  		err = -ECONNRESET;
1718  		if (sunaddr == NULL)
1719  			goto out_free;
1720  
1721  		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1722  					hash, &err);
1723  		if (other == NULL)
1724  			goto out_free;
1725  	}
1726  
1727  	if (sk_filter(other, skb) < 0) {
1728  		/* Toss the packet but do not return any error to the sender */
1729  		err = len;
1730  		goto out_free;
1731  	}
1732  
1733  	sk_locked = 0;
1734  	unix_state_lock(other);
1735  restart_locked:
1736  	err = -EPERM;
1737  	if (!unix_may_send(sk, other))
1738  		goto out_unlock;
1739  
1740  	if (unlikely(sock_flag(other, SOCK_DEAD))) {
1741  		/*
1742  		 *	Check with 1003.1g - what should
1743  		 *	datagram error
1744  		 */
1745  		unix_state_unlock(other);
1746  		sock_put(other);
1747  
1748  		if (!sk_locked)
1749  			unix_state_lock(sk);
1750  
1751  		err = 0;
1752  		if (unix_peer(sk) == other) {
1753  			unix_peer(sk) = NULL;
1754  			unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1755  
1756  			unix_state_unlock(sk);
1757  
1758  			unix_dgram_disconnected(sk, other);
1759  			sock_put(other);
1760  			err = -ECONNREFUSED;
1761  		} else {
1762  			unix_state_unlock(sk);
1763  		}
1764  
1765  		other = NULL;
1766  		if (err)
1767  			goto out_free;
1768  		goto restart;
1769  	}
1770  
1771  	err = -EPIPE;
1772  	if (other->sk_shutdown & RCV_SHUTDOWN)
1773  		goto out_unlock;
1774  
1775  	if (sk->sk_type != SOCK_SEQPACKET) {
1776  		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1777  		if (err)
1778  			goto out_unlock;
1779  	}
1780  
1781  	/* other == sk && unix_peer(other) != sk if
1782  	 * - unix_peer(sk) == NULL, destination address bound to sk
1783  	 * - unix_peer(sk) == sk by time of get but disconnected before lock
1784  	 */
1785  	if (other != sk &&
1786  	    unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1787  		if (timeo) {
1788  			timeo = unix_wait_for_peer(other, timeo);
1789  
1790  			err = sock_intr_errno(timeo);
1791  			if (signal_pending(current))
1792  				goto out_free;
1793  
1794  			goto restart;
1795  		}
1796  
1797  		if (!sk_locked) {
1798  			unix_state_unlock(other);
1799  			unix_state_double_lock(sk, other);
1800  		}
1801  
1802  		if (unix_peer(sk) != other ||
1803  		    unix_dgram_peer_wake_me(sk, other)) {
1804  			err = -EAGAIN;
1805  			sk_locked = 1;
1806  			goto out_unlock;
1807  		}
1808  
1809  		if (!sk_locked) {
1810  			sk_locked = 1;
1811  			goto restart_locked;
1812  		}
1813  	}
1814  
1815  	if (unlikely(sk_locked))
1816  		unix_state_unlock(sk);
1817  
1818  	if (sock_flag(other, SOCK_RCVTSTAMP))
1819  		__net_timestamp(skb);
1820  	maybe_add_creds(skb, sock, other);
1821  	skb_queue_tail(&other->sk_receive_queue, skb);
1822  	if (max_level > unix_sk(other)->recursion_level)
1823  		unix_sk(other)->recursion_level = max_level;
1824  	unix_state_unlock(other);
1825  	other->sk_data_ready(other);
1826  	sock_put(other);
1827  	scm_destroy(&scm);
1828  	return len;
1829  
1830  out_unlock:
1831  	if (sk_locked)
1832  		unix_state_unlock(sk);
1833  	unix_state_unlock(other);
1834  out_free:
1835  	kfree_skb(skb);
1836  out:
1837  	if (other)
1838  		sock_put(other);
1839  	scm_destroy(&scm);
1840  	return err;
1841  }
1842  
1843  /* We use paged skbs for stream sockets, and limit occupancy to 32768
1844   * bytes, and a minimun of a full page.
1845   */
1846  #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1847  
1848  static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1849  			       size_t len)
1850  {
1851  	struct sock *sk = sock->sk;
1852  	struct sock *other = NULL;
1853  	int err, size;
1854  	struct sk_buff *skb;
1855  	int sent = 0;
1856  	struct scm_cookie scm;
1857  	bool fds_sent = false;
1858  	int max_level;
1859  	int data_len;
1860  
1861  	wait_for_unix_gc();
1862  	err = scm_send(sock, msg, &scm, false);
1863  	if (err < 0)
1864  		return err;
1865  
1866  	err = -EOPNOTSUPP;
1867  	if (msg->msg_flags&MSG_OOB)
1868  		goto out_err;
1869  
1870  	if (msg->msg_namelen) {
1871  		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1872  		goto out_err;
1873  	} else {
1874  		err = -ENOTCONN;
1875  		other = unix_peer(sk);
1876  		if (!other)
1877  			goto out_err;
1878  	}
1879  
1880  	if (sk->sk_shutdown & SEND_SHUTDOWN)
1881  		goto pipe_err;
1882  
1883  	while (sent < len) {
1884  		size = len - sent;
1885  
1886  		/* Keep two messages in the pipe so it schedules better */
1887  		size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1888  
1889  		/* allow fallback to order-0 allocations */
1890  		size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1891  
1892  		data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1893  
1894  		data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1895  
1896  		skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1897  					   msg->msg_flags & MSG_DONTWAIT, &err,
1898  					   get_order(UNIX_SKB_FRAGS_SZ));
1899  		if (!skb)
1900  			goto out_err;
1901  
1902  		/* Only send the fds in the first buffer */
1903  		err = unix_scm_to_skb(&scm, skb, !fds_sent);
1904  		if (err < 0) {
1905  			kfree_skb(skb);
1906  			goto out_err;
1907  		}
1908  		max_level = err + 1;
1909  		fds_sent = true;
1910  
1911  		skb_put(skb, size - data_len);
1912  		skb->data_len = data_len;
1913  		skb->len = size;
1914  		err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1915  		if (err) {
1916  			kfree_skb(skb);
1917  			goto out_err;
1918  		}
1919  
1920  		unix_state_lock(other);
1921  
1922  		if (sock_flag(other, SOCK_DEAD) ||
1923  		    (other->sk_shutdown & RCV_SHUTDOWN))
1924  			goto pipe_err_free;
1925  
1926  		maybe_add_creds(skb, sock, other);
1927  		skb_queue_tail(&other->sk_receive_queue, skb);
1928  		if (max_level > unix_sk(other)->recursion_level)
1929  			unix_sk(other)->recursion_level = max_level;
1930  		unix_state_unlock(other);
1931  		other->sk_data_ready(other);
1932  		sent += size;
1933  	}
1934  
1935  	scm_destroy(&scm);
1936  
1937  	return sent;
1938  
1939  pipe_err_free:
1940  	unix_state_unlock(other);
1941  	kfree_skb(skb);
1942  pipe_err:
1943  	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1944  		send_sig(SIGPIPE, current, 0);
1945  	err = -EPIPE;
1946  out_err:
1947  	scm_destroy(&scm);
1948  	return sent ? : err;
1949  }
1950  
1951  static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1952  				    int offset, size_t size, int flags)
1953  {
1954  	int err;
1955  	bool send_sigpipe = false;
1956  	bool init_scm = true;
1957  	struct scm_cookie scm;
1958  	struct sock *other, *sk = socket->sk;
1959  	struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1960  
1961  	if (flags & MSG_OOB)
1962  		return -EOPNOTSUPP;
1963  
1964  	other = unix_peer(sk);
1965  	if (!other || sk->sk_state != TCP_ESTABLISHED)
1966  		return -ENOTCONN;
1967  
1968  	if (false) {
1969  alloc_skb:
1970  		unix_state_unlock(other);
1971  		mutex_unlock(&unix_sk(other)->readlock);
1972  		newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1973  					      &err, 0);
1974  		if (!newskb)
1975  			goto err;
1976  	}
1977  
1978  	/* we must acquire readlock as we modify already present
1979  	 * skbs in the sk_receive_queue and mess with skb->len
1980  	 */
1981  	err = mutex_lock_interruptible(&unix_sk(other)->readlock);
1982  	if (err) {
1983  		err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1984  		goto err;
1985  	}
1986  
1987  	if (sk->sk_shutdown & SEND_SHUTDOWN) {
1988  		err = -EPIPE;
1989  		send_sigpipe = true;
1990  		goto err_unlock;
1991  	}
1992  
1993  	unix_state_lock(other);
1994  
1995  	if (sock_flag(other, SOCK_DEAD) ||
1996  	    other->sk_shutdown & RCV_SHUTDOWN) {
1997  		err = -EPIPE;
1998  		send_sigpipe = true;
1999  		goto err_state_unlock;
2000  	}
2001  
2002  	if (init_scm) {
2003  		err = maybe_init_creds(&scm, socket, other);
2004  		if (err)
2005  			goto err_state_unlock;
2006  		init_scm = false;
2007  	}
2008  
2009  	skb = skb_peek_tail(&other->sk_receive_queue);
2010  	if (tail && tail == skb) {
2011  		skb = newskb;
2012  	} else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2013  		if (newskb) {
2014  			skb = newskb;
2015  		} else {
2016  			tail = skb;
2017  			goto alloc_skb;
2018  		}
2019  	} else if (newskb) {
2020  		/* this is fast path, we don't necessarily need to
2021  		 * call to kfree_skb even though with newskb == NULL
2022  		 * this - does no harm
2023  		 */
2024  		consume_skb(newskb);
2025  		newskb = NULL;
2026  	}
2027  
2028  	if (skb_append_pagefrags(skb, page, offset, size)) {
2029  		tail = skb;
2030  		goto alloc_skb;
2031  	}
2032  
2033  	skb->len += size;
2034  	skb->data_len += size;
2035  	skb->truesize += size;
2036  	atomic_add(size, &sk->sk_wmem_alloc);
2037  
2038  	if (newskb) {
2039  		err = unix_scm_to_skb(&scm, skb, false);
2040  		if (err)
2041  			goto err_state_unlock;
2042  		spin_lock(&other->sk_receive_queue.lock);
2043  		__skb_queue_tail(&other->sk_receive_queue, newskb);
2044  		spin_unlock(&other->sk_receive_queue.lock);
2045  	}
2046  
2047  	unix_state_unlock(other);
2048  	mutex_unlock(&unix_sk(other)->readlock);
2049  
2050  	other->sk_data_ready(other);
2051  	scm_destroy(&scm);
2052  	return size;
2053  
2054  err_state_unlock:
2055  	unix_state_unlock(other);
2056  err_unlock:
2057  	mutex_unlock(&unix_sk(other)->readlock);
2058  err:
2059  	kfree_skb(newskb);
2060  	if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2061  		send_sig(SIGPIPE, current, 0);
2062  	if (!init_scm)
2063  		scm_destroy(&scm);
2064  	return err;
2065  }
2066  
2067  static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2068  				  size_t len)
2069  {
2070  	int err;
2071  	struct sock *sk = sock->sk;
2072  
2073  	err = sock_error(sk);
2074  	if (err)
2075  		return err;
2076  
2077  	if (sk->sk_state != TCP_ESTABLISHED)
2078  		return -ENOTCONN;
2079  
2080  	if (msg->msg_namelen)
2081  		msg->msg_namelen = 0;
2082  
2083  	return unix_dgram_sendmsg(sock, msg, len);
2084  }
2085  
2086  static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2087  				  size_t size, int flags)
2088  {
2089  	struct sock *sk = sock->sk;
2090  
2091  	if (sk->sk_state != TCP_ESTABLISHED)
2092  		return -ENOTCONN;
2093  
2094  	return unix_dgram_recvmsg(sock, msg, size, flags);
2095  }
2096  
2097  static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2098  {
2099  	struct unix_sock *u = unix_sk(sk);
2100  
2101  	if (u->addr) {
2102  		msg->msg_namelen = u->addr->len;
2103  		memcpy(msg->msg_name, u->addr->name, u->addr->len);
2104  	}
2105  }
2106  
2107  static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2108  			      size_t size, int flags)
2109  {
2110  	struct scm_cookie scm;
2111  	struct sock *sk = sock->sk;
2112  	struct unix_sock *u = unix_sk(sk);
2113  	struct sk_buff *skb, *last;
2114  	long timeo;
2115  	int err;
2116  	int peeked, skip;
2117  
2118  	err = -EOPNOTSUPP;
2119  	if (flags&MSG_OOB)
2120  		goto out;
2121  
2122  	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2123  
2124  	do {
2125  		mutex_lock(&u->readlock);
2126  
2127  		skip = sk_peek_offset(sk, flags);
2128  		skb = __skb_try_recv_datagram(sk, flags, &peeked, &skip, &err,
2129  					      &last);
2130  		if (skb)
2131  			break;
2132  
2133  		mutex_unlock(&u->readlock);
2134  
2135  		if (err != -EAGAIN)
2136  			break;
2137  	} while (timeo &&
2138  		 !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2139  
2140  	if (!skb) { /* implies readlock unlocked */
2141  		unix_state_lock(sk);
2142  		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2143  		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2144  		    (sk->sk_shutdown & RCV_SHUTDOWN))
2145  			err = 0;
2146  		unix_state_unlock(sk);
2147  		goto out;
2148  	}
2149  
2150  	if (wq_has_sleeper(&u->peer_wait))
2151  		wake_up_interruptible_sync_poll(&u->peer_wait,
2152  						POLLOUT | POLLWRNORM |
2153  						POLLWRBAND);
2154  
2155  	if (msg->msg_name)
2156  		unix_copy_addr(msg, skb->sk);
2157  
2158  	if (size > skb->len - skip)
2159  		size = skb->len - skip;
2160  	else if (size < skb->len - skip)
2161  		msg->msg_flags |= MSG_TRUNC;
2162  
2163  	err = skb_copy_datagram_msg(skb, skip, msg, size);
2164  	if (err)
2165  		goto out_free;
2166  
2167  	if (sock_flag(sk, SOCK_RCVTSTAMP))
2168  		__sock_recv_timestamp(msg, sk, skb);
2169  
2170  	memset(&scm, 0, sizeof(scm));
2171  
2172  	scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2173  	unix_set_secdata(&scm, skb);
2174  
2175  	if (!(flags & MSG_PEEK)) {
2176  		if (UNIXCB(skb).fp)
2177  			unix_detach_fds(&scm, skb);
2178  
2179  		sk_peek_offset_bwd(sk, skb->len);
2180  	} else {
2181  		/* It is questionable: on PEEK we could:
2182  		   - do not return fds - good, but too simple 8)
2183  		   - return fds, and do not return them on read (old strategy,
2184  		     apparently wrong)
2185  		   - clone fds (I chose it for now, it is the most universal
2186  		     solution)
2187  
2188  		   POSIX 1003.1g does not actually define this clearly
2189  		   at all. POSIX 1003.1g doesn't define a lot of things
2190  		   clearly however!
2191  
2192  		*/
2193  
2194  		sk_peek_offset_fwd(sk, size);
2195  
2196  		if (UNIXCB(skb).fp)
2197  			scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2198  	}
2199  	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2200  
2201  	scm_recv(sock, msg, &scm, flags);
2202  
2203  out_free:
2204  	skb_free_datagram(sk, skb);
2205  	mutex_unlock(&u->readlock);
2206  out:
2207  	return err;
2208  }
2209  
2210  /*
2211   *	Sleep until more data has arrived. But check for races..
2212   */
2213  static long unix_stream_data_wait(struct sock *sk, long timeo,
2214  				  struct sk_buff *last, unsigned int last_len)
2215  {
2216  	struct sk_buff *tail;
2217  	DEFINE_WAIT(wait);
2218  
2219  	unix_state_lock(sk);
2220  
2221  	for (;;) {
2222  		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2223  
2224  		tail = skb_peek_tail(&sk->sk_receive_queue);
2225  		if (tail != last ||
2226  		    (tail && tail->len != last_len) ||
2227  		    sk->sk_err ||
2228  		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
2229  		    signal_pending(current) ||
2230  		    !timeo)
2231  			break;
2232  
2233  		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2234  		unix_state_unlock(sk);
2235  		timeo = freezable_schedule_timeout(timeo);
2236  		unix_state_lock(sk);
2237  
2238  		if (sock_flag(sk, SOCK_DEAD))
2239  			break;
2240  
2241  		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2242  	}
2243  
2244  	finish_wait(sk_sleep(sk), &wait);
2245  	unix_state_unlock(sk);
2246  	return timeo;
2247  }
2248  
2249  static unsigned int unix_skb_len(const struct sk_buff *skb)
2250  {
2251  	return skb->len - UNIXCB(skb).consumed;
2252  }
2253  
2254  struct unix_stream_read_state {
2255  	int (*recv_actor)(struct sk_buff *, int, int,
2256  			  struct unix_stream_read_state *);
2257  	struct socket *socket;
2258  	struct msghdr *msg;
2259  	struct pipe_inode_info *pipe;
2260  	size_t size;
2261  	int flags;
2262  	unsigned int splice_flags;
2263  };
2264  
2265  static int unix_stream_read_generic(struct unix_stream_read_state *state)
2266  {
2267  	struct scm_cookie scm;
2268  	struct socket *sock = state->socket;
2269  	struct sock *sk = sock->sk;
2270  	struct unix_sock *u = unix_sk(sk);
2271  	int copied = 0;
2272  	int flags = state->flags;
2273  	int noblock = flags & MSG_DONTWAIT;
2274  	bool check_creds = false;
2275  	int target;
2276  	int err = 0;
2277  	long timeo;
2278  	int skip;
2279  	size_t size = state->size;
2280  	unsigned int last_len;
2281  
2282  	if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2283  		err = -EINVAL;
2284  		goto out;
2285  	}
2286  
2287  	if (unlikely(flags & MSG_OOB)) {
2288  		err = -EOPNOTSUPP;
2289  		goto out;
2290  	}
2291  
2292  	target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2293  	timeo = sock_rcvtimeo(sk, noblock);
2294  
2295  	memset(&scm, 0, sizeof(scm));
2296  
2297  	/* Lock the socket to prevent queue disordering
2298  	 * while sleeps in memcpy_tomsg
2299  	 */
2300  	mutex_lock(&u->readlock);
2301  
2302  	if (flags & MSG_PEEK)
2303  		skip = sk_peek_offset(sk, flags);
2304  	else
2305  		skip = 0;
2306  
2307  	do {
2308  		int chunk;
2309  		bool drop_skb;
2310  		struct sk_buff *skb, *last;
2311  
2312  redo:
2313  		unix_state_lock(sk);
2314  		if (sock_flag(sk, SOCK_DEAD)) {
2315  			err = -ECONNRESET;
2316  			goto unlock;
2317  		}
2318  		last = skb = skb_peek(&sk->sk_receive_queue);
2319  		last_len = last ? last->len : 0;
2320  again:
2321  		if (skb == NULL) {
2322  			unix_sk(sk)->recursion_level = 0;
2323  			if (copied >= target)
2324  				goto unlock;
2325  
2326  			/*
2327  			 *	POSIX 1003.1g mandates this order.
2328  			 */
2329  
2330  			err = sock_error(sk);
2331  			if (err)
2332  				goto unlock;
2333  			if (sk->sk_shutdown & RCV_SHUTDOWN)
2334  				goto unlock;
2335  
2336  			unix_state_unlock(sk);
2337  			if (!timeo) {
2338  				err = -EAGAIN;
2339  				break;
2340  			}
2341  
2342  			mutex_unlock(&u->readlock);
2343  
2344  			timeo = unix_stream_data_wait(sk, timeo, last,
2345  						      last_len);
2346  
2347  			if (signal_pending(current)) {
2348  				err = sock_intr_errno(timeo);
2349  				scm_destroy(&scm);
2350  				goto out;
2351  			}
2352  
2353  			mutex_lock(&u->readlock);
2354  			goto redo;
2355  unlock:
2356  			unix_state_unlock(sk);
2357  			break;
2358  		}
2359  
2360  		while (skip >= unix_skb_len(skb)) {
2361  			skip -= unix_skb_len(skb);
2362  			last = skb;
2363  			last_len = skb->len;
2364  			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2365  			if (!skb)
2366  				goto again;
2367  		}
2368  
2369  		unix_state_unlock(sk);
2370  
2371  		if (check_creds) {
2372  			/* Never glue messages from different writers */
2373  			if (!unix_skb_scm_eq(skb, &scm))
2374  				break;
2375  		} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2376  			/* Copy credentials */
2377  			scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2378  			unix_set_secdata(&scm, skb);
2379  			check_creds = true;
2380  		}
2381  
2382  		/* Copy address just once */
2383  		if (state->msg && state->msg->msg_name) {
2384  			DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2385  					 state->msg->msg_name);
2386  			unix_copy_addr(state->msg, skb->sk);
2387  			sunaddr = NULL;
2388  		}
2389  
2390  		chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2391  		skb_get(skb);
2392  		chunk = state->recv_actor(skb, skip, chunk, state);
2393  		drop_skb = !unix_skb_len(skb);
2394  		/* skb is only safe to use if !drop_skb */
2395  		consume_skb(skb);
2396  		if (chunk < 0) {
2397  			if (copied == 0)
2398  				copied = -EFAULT;
2399  			break;
2400  		}
2401  		copied += chunk;
2402  		size -= chunk;
2403  
2404  		if (drop_skb) {
2405  			/* the skb was touched by a concurrent reader;
2406  			 * we should not expect anything from this skb
2407  			 * anymore and assume it invalid - we can be
2408  			 * sure it was dropped from the socket queue
2409  			 *
2410  			 * let's report a short read
2411  			 */
2412  			err = 0;
2413  			break;
2414  		}
2415  
2416  		/* Mark read part of skb as used */
2417  		if (!(flags & MSG_PEEK)) {
2418  			UNIXCB(skb).consumed += chunk;
2419  
2420  			sk_peek_offset_bwd(sk, chunk);
2421  
2422  			if (UNIXCB(skb).fp)
2423  				unix_detach_fds(&scm, skb);
2424  
2425  			if (unix_skb_len(skb))
2426  				break;
2427  
2428  			skb_unlink(skb, &sk->sk_receive_queue);
2429  			consume_skb(skb);
2430  
2431  			if (scm.fp)
2432  				break;
2433  		} else {
2434  			/* It is questionable, see note in unix_dgram_recvmsg.
2435  			 */
2436  			if (UNIXCB(skb).fp)
2437  				scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2438  
2439  			sk_peek_offset_fwd(sk, chunk);
2440  
2441  			if (UNIXCB(skb).fp)
2442  				break;
2443  
2444  			skip = 0;
2445  			last = skb;
2446  			last_len = skb->len;
2447  			unix_state_lock(sk);
2448  			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2449  			if (skb)
2450  				goto again;
2451  			unix_state_unlock(sk);
2452  			break;
2453  		}
2454  	} while (size);
2455  
2456  	mutex_unlock(&u->readlock);
2457  	if (state->msg)
2458  		scm_recv(sock, state->msg, &scm, flags);
2459  	else
2460  		scm_destroy(&scm);
2461  out:
2462  	return copied ? : err;
2463  }
2464  
2465  static int unix_stream_read_actor(struct sk_buff *skb,
2466  				  int skip, int chunk,
2467  				  struct unix_stream_read_state *state)
2468  {
2469  	int ret;
2470  
2471  	ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2472  				    state->msg, chunk);
2473  	return ret ?: chunk;
2474  }
2475  
2476  static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2477  			       size_t size, int flags)
2478  {
2479  	struct unix_stream_read_state state = {
2480  		.recv_actor = unix_stream_read_actor,
2481  		.socket = sock,
2482  		.msg = msg,
2483  		.size = size,
2484  		.flags = flags
2485  	};
2486  
2487  	return unix_stream_read_generic(&state);
2488  }
2489  
2490  static ssize_t skb_unix_socket_splice(struct sock *sk,
2491  				      struct pipe_inode_info *pipe,
2492  				      struct splice_pipe_desc *spd)
2493  {
2494  	int ret;
2495  	struct unix_sock *u = unix_sk(sk);
2496  
2497  	mutex_unlock(&u->readlock);
2498  	ret = splice_to_pipe(pipe, spd);
2499  	mutex_lock(&u->readlock);
2500  
2501  	return ret;
2502  }
2503  
2504  static int unix_stream_splice_actor(struct sk_buff *skb,
2505  				    int skip, int chunk,
2506  				    struct unix_stream_read_state *state)
2507  {
2508  	return skb_splice_bits(skb, state->socket->sk,
2509  			       UNIXCB(skb).consumed + skip,
2510  			       state->pipe, chunk, state->splice_flags,
2511  			       skb_unix_socket_splice);
2512  }
2513  
2514  static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2515  				       struct pipe_inode_info *pipe,
2516  				       size_t size, unsigned int flags)
2517  {
2518  	struct unix_stream_read_state state = {
2519  		.recv_actor = unix_stream_splice_actor,
2520  		.socket = sock,
2521  		.pipe = pipe,
2522  		.size = size,
2523  		.splice_flags = flags,
2524  	};
2525  
2526  	if (unlikely(*ppos))
2527  		return -ESPIPE;
2528  
2529  	if (sock->file->f_flags & O_NONBLOCK ||
2530  	    flags & SPLICE_F_NONBLOCK)
2531  		state.flags = MSG_DONTWAIT;
2532  
2533  	return unix_stream_read_generic(&state);
2534  }
2535  
2536  static int unix_shutdown(struct socket *sock, int mode)
2537  {
2538  	struct sock *sk = sock->sk;
2539  	struct sock *other;
2540  
2541  	if (mode < SHUT_RD || mode > SHUT_RDWR)
2542  		return -EINVAL;
2543  	/* This maps:
2544  	 * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2545  	 * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2546  	 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2547  	 */
2548  	++mode;
2549  
2550  	unix_state_lock(sk);
2551  	sk->sk_shutdown |= mode;
2552  	other = unix_peer(sk);
2553  	if (other)
2554  		sock_hold(other);
2555  	unix_state_unlock(sk);
2556  	sk->sk_state_change(sk);
2557  
2558  	if (other &&
2559  		(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2560  
2561  		int peer_mode = 0;
2562  
2563  		if (mode&RCV_SHUTDOWN)
2564  			peer_mode |= SEND_SHUTDOWN;
2565  		if (mode&SEND_SHUTDOWN)
2566  			peer_mode |= RCV_SHUTDOWN;
2567  		unix_state_lock(other);
2568  		other->sk_shutdown |= peer_mode;
2569  		unix_state_unlock(other);
2570  		other->sk_state_change(other);
2571  		if (peer_mode == SHUTDOWN_MASK)
2572  			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2573  		else if (peer_mode & RCV_SHUTDOWN)
2574  			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2575  	}
2576  	if (other)
2577  		sock_put(other);
2578  
2579  	return 0;
2580  }
2581  
2582  long unix_inq_len(struct sock *sk)
2583  {
2584  	struct sk_buff *skb;
2585  	long amount = 0;
2586  
2587  	if (sk->sk_state == TCP_LISTEN)
2588  		return -EINVAL;
2589  
2590  	spin_lock(&sk->sk_receive_queue.lock);
2591  	if (sk->sk_type == SOCK_STREAM ||
2592  	    sk->sk_type == SOCK_SEQPACKET) {
2593  		skb_queue_walk(&sk->sk_receive_queue, skb)
2594  			amount += unix_skb_len(skb);
2595  	} else {
2596  		skb = skb_peek(&sk->sk_receive_queue);
2597  		if (skb)
2598  			amount = skb->len;
2599  	}
2600  	spin_unlock(&sk->sk_receive_queue.lock);
2601  
2602  	return amount;
2603  }
2604  EXPORT_SYMBOL_GPL(unix_inq_len);
2605  
2606  long unix_outq_len(struct sock *sk)
2607  {
2608  	return sk_wmem_alloc_get(sk);
2609  }
2610  EXPORT_SYMBOL_GPL(unix_outq_len);
2611  
2612  static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2613  {
2614  	struct sock *sk = sock->sk;
2615  	long amount = 0;
2616  	int err;
2617  
2618  	switch (cmd) {
2619  	case SIOCOUTQ:
2620  		amount = unix_outq_len(sk);
2621  		err = put_user(amount, (int __user *)arg);
2622  		break;
2623  	case SIOCINQ:
2624  		amount = unix_inq_len(sk);
2625  		if (amount < 0)
2626  			err = amount;
2627  		else
2628  			err = put_user(amount, (int __user *)arg);
2629  		break;
2630  	default:
2631  		err = -ENOIOCTLCMD;
2632  		break;
2633  	}
2634  	return err;
2635  }
2636  
2637  static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2638  {
2639  	struct sock *sk = sock->sk;
2640  	unsigned int mask;
2641  
2642  	sock_poll_wait(file, sk_sleep(sk), wait);
2643  	mask = 0;
2644  
2645  	/* exceptional events? */
2646  	if (sk->sk_err)
2647  		mask |= POLLERR;
2648  	if (sk->sk_shutdown == SHUTDOWN_MASK)
2649  		mask |= POLLHUP;
2650  	if (sk->sk_shutdown & RCV_SHUTDOWN)
2651  		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2652  
2653  	/* readable? */
2654  	if (!skb_queue_empty(&sk->sk_receive_queue))
2655  		mask |= POLLIN | POLLRDNORM;
2656  
2657  	/* Connection-based need to check for termination and startup */
2658  	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2659  	    sk->sk_state == TCP_CLOSE)
2660  		mask |= POLLHUP;
2661  
2662  	/*
2663  	 * we set writable also when the other side has shut down the
2664  	 * connection. This prevents stuck sockets.
2665  	 */
2666  	if (unix_writable(sk))
2667  		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2668  
2669  	return mask;
2670  }
2671  
2672  static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2673  				    poll_table *wait)
2674  {
2675  	struct sock *sk = sock->sk, *other;
2676  	unsigned int mask, writable;
2677  
2678  	sock_poll_wait(file, sk_sleep(sk), wait);
2679  	mask = 0;
2680  
2681  	/* exceptional events? */
2682  	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2683  		mask |= POLLERR |
2684  			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
2685  
2686  	if (sk->sk_shutdown & RCV_SHUTDOWN)
2687  		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2688  	if (sk->sk_shutdown == SHUTDOWN_MASK)
2689  		mask |= POLLHUP;
2690  
2691  	/* readable? */
2692  	if (!skb_queue_empty(&sk->sk_receive_queue))
2693  		mask |= POLLIN | POLLRDNORM;
2694  
2695  	/* Connection-based need to check for termination and startup */
2696  	if (sk->sk_type == SOCK_SEQPACKET) {
2697  		if (sk->sk_state == TCP_CLOSE)
2698  			mask |= POLLHUP;
2699  		/* connection hasn't started yet? */
2700  		if (sk->sk_state == TCP_SYN_SENT)
2701  			return mask;
2702  	}
2703  
2704  	/* No write status requested, avoid expensive OUT tests. */
2705  	if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
2706  		return mask;
2707  
2708  	writable = unix_writable(sk);
2709  	if (writable) {
2710  		unix_state_lock(sk);
2711  
2712  		other = unix_peer(sk);
2713  		if (other && unix_peer(other) != sk &&
2714  		    unix_recvq_full(other) &&
2715  		    unix_dgram_peer_wake_me(sk, other))
2716  			writable = 0;
2717  
2718  		unix_state_unlock(sk);
2719  	}
2720  
2721  	if (writable)
2722  		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2723  	else
2724  		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2725  
2726  	return mask;
2727  }
2728  
2729  #ifdef CONFIG_PROC_FS
2730  
2731  #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2732  
2733  #define get_bucket(x) ((x) >> BUCKET_SPACE)
2734  #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2735  #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2736  
2737  static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2738  {
2739  	unsigned long offset = get_offset(*pos);
2740  	unsigned long bucket = get_bucket(*pos);
2741  	struct sock *sk;
2742  	unsigned long count = 0;
2743  
2744  	for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2745  		if (sock_net(sk) != seq_file_net(seq))
2746  			continue;
2747  		if (++count == offset)
2748  			break;
2749  	}
2750  
2751  	return sk;
2752  }
2753  
2754  static struct sock *unix_next_socket(struct seq_file *seq,
2755  				     struct sock *sk,
2756  				     loff_t *pos)
2757  {
2758  	unsigned long bucket;
2759  
2760  	while (sk > (struct sock *)SEQ_START_TOKEN) {
2761  		sk = sk_next(sk);
2762  		if (!sk)
2763  			goto next_bucket;
2764  		if (sock_net(sk) == seq_file_net(seq))
2765  			return sk;
2766  	}
2767  
2768  	do {
2769  		sk = unix_from_bucket(seq, pos);
2770  		if (sk)
2771  			return sk;
2772  
2773  next_bucket:
2774  		bucket = get_bucket(*pos) + 1;
2775  		*pos = set_bucket_offset(bucket, 1);
2776  	} while (bucket < ARRAY_SIZE(unix_socket_table));
2777  
2778  	return NULL;
2779  }
2780  
2781  static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2782  	__acquires(unix_table_lock)
2783  {
2784  	spin_lock(&unix_table_lock);
2785  
2786  	if (!*pos)
2787  		return SEQ_START_TOKEN;
2788  
2789  	if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2790  		return NULL;
2791  
2792  	return unix_next_socket(seq, NULL, pos);
2793  }
2794  
2795  static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2796  {
2797  	++*pos;
2798  	return unix_next_socket(seq, v, pos);
2799  }
2800  
2801  static void unix_seq_stop(struct seq_file *seq, void *v)
2802  	__releases(unix_table_lock)
2803  {
2804  	spin_unlock(&unix_table_lock);
2805  }
2806  
2807  static int unix_seq_show(struct seq_file *seq, void *v)
2808  {
2809  
2810  	if (v == SEQ_START_TOKEN)
2811  		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2812  			 "Inode Path\n");
2813  	else {
2814  		struct sock *s = v;
2815  		struct unix_sock *u = unix_sk(s);
2816  		unix_state_lock(s);
2817  
2818  		seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2819  			s,
2820  			atomic_read(&s->sk_refcnt),
2821  			0,
2822  			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2823  			s->sk_type,
2824  			s->sk_socket ?
2825  			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2826  			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2827  			sock_i_ino(s));
2828  
2829  		if (u->addr) {
2830  			int i, len;
2831  			seq_putc(seq, ' ');
2832  
2833  			i = 0;
2834  			len = u->addr->len - sizeof(short);
2835  			if (!UNIX_ABSTRACT(s))
2836  				len--;
2837  			else {
2838  				seq_putc(seq, '@');
2839  				i++;
2840  			}
2841  			for ( ; i < len; i++)
2842  				seq_putc(seq, u->addr->name->sun_path[i]);
2843  		}
2844  		unix_state_unlock(s);
2845  		seq_putc(seq, '\n');
2846  	}
2847  
2848  	return 0;
2849  }
2850  
2851  static const struct seq_operations unix_seq_ops = {
2852  	.start  = unix_seq_start,
2853  	.next   = unix_seq_next,
2854  	.stop   = unix_seq_stop,
2855  	.show   = unix_seq_show,
2856  };
2857  
2858  static int unix_seq_open(struct inode *inode, struct file *file)
2859  {
2860  	return seq_open_net(inode, file, &unix_seq_ops,
2861  			    sizeof(struct seq_net_private));
2862  }
2863  
2864  static const struct file_operations unix_seq_fops = {
2865  	.owner		= THIS_MODULE,
2866  	.open		= unix_seq_open,
2867  	.read		= seq_read,
2868  	.llseek		= seq_lseek,
2869  	.release	= seq_release_net,
2870  };
2871  
2872  #endif
2873  
2874  static const struct net_proto_family unix_family_ops = {
2875  	.family = PF_UNIX,
2876  	.create = unix_create,
2877  	.owner	= THIS_MODULE,
2878  };
2879  
2880  
2881  static int __net_init unix_net_init(struct net *net)
2882  {
2883  	int error = -ENOMEM;
2884  
2885  	net->unx.sysctl_max_dgram_qlen = 10;
2886  	if (unix_sysctl_register(net))
2887  		goto out;
2888  
2889  #ifdef CONFIG_PROC_FS
2890  	if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
2891  		unix_sysctl_unregister(net);
2892  		goto out;
2893  	}
2894  #endif
2895  	error = 0;
2896  out:
2897  	return error;
2898  }
2899  
2900  static void __net_exit unix_net_exit(struct net *net)
2901  {
2902  	unix_sysctl_unregister(net);
2903  	remove_proc_entry("unix", net->proc_net);
2904  }
2905  
2906  static struct pernet_operations unix_net_ops = {
2907  	.init = unix_net_init,
2908  	.exit = unix_net_exit,
2909  };
2910  
2911  static int __init af_unix_init(void)
2912  {
2913  	int rc = -1;
2914  
2915  	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2916  
2917  	rc = proto_register(&unix_proto, 1);
2918  	if (rc != 0) {
2919  		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2920  		goto out;
2921  	}
2922  
2923  	sock_register(&unix_family_ops);
2924  	register_pernet_subsys(&unix_net_ops);
2925  out:
2926  	return rc;
2927  }
2928  
2929  static void __exit af_unix_exit(void)
2930  {
2931  	sock_unregister(PF_UNIX);
2932  	proto_unregister(&unix_proto);
2933  	unregister_pernet_subsys(&unix_net_ops);
2934  }
2935  
2936  /* Earlier than device_initcall() so that other drivers invoking
2937     request_module() don't end up in a loop when modprobe tries
2938     to use a UNIX socket. But later than subsys_initcall() because
2939     we depend on stuff initialised there */
2940  fs_initcall(af_unix_init);
2941  module_exit(af_unix_exit);
2942  
2943  MODULE_LICENSE("GPL");
2944  MODULE_ALIAS_NETPROTO(PF_UNIX);
2945