xref: /linux/net/unix/af_unix.c (revision 27cf5706a04e53f6844c71be1cbbf1df665f5d19)
1  /*
2   * NET4:	Implementation of BSD Unix domain sockets.
3   *
4   * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
5   *
6   *		This program is free software; you can redistribute it and/or
7   *		modify it under the terms of the GNU General Public License
8   *		as published by the Free Software Foundation; either version
9   *		2 of the License, or (at your option) any later version.
10   *
11   * Fixes:
12   *		Linus Torvalds	:	Assorted bug cures.
13   *		Niibe Yutaka	:	async I/O support.
14   *		Carsten Paeth	:	PF_UNIX check, address fixes.
15   *		Alan Cox	:	Limit size of allocated blocks.
16   *		Alan Cox	:	Fixed the stupid socketpair bug.
17   *		Alan Cox	:	BSD compatibility fine tuning.
18   *		Alan Cox	:	Fixed a bug in connect when interrupted.
19   *		Alan Cox	:	Sorted out a proper draft version of
20   *					file descriptor passing hacked up from
21   *					Mike Shaver's work.
22   *		Marty Leisner	:	Fixes to fd passing
23   *		Nick Nevin	:	recvmsg bugfix.
24   *		Alan Cox	:	Started proper garbage collector
25   *		Heiko EiBfeldt	:	Missing verify_area check
26   *		Alan Cox	:	Started POSIXisms
27   *		Andreas Schwab	:	Replace inode by dentry for proper
28   *					reference counting
29   *		Kirk Petersen	:	Made this a module
30   *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
31   *					Lots of bug fixes.
32   *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
33   *					by above two patches.
34   *	     Andrea Arcangeli	:	If possible we block in connect(2)
35   *					if the max backlog of the listen socket
36   *					is been reached. This won't break
37   *					old apps and it will avoid huge amount
38   *					of socks hashed (this for unix_gc()
39   *					performances reasons).
40   *					Security fix that limits the max
41   *					number of socks to 2*max_files and
42   *					the number of skb queueable in the
43   *					dgram receiver.
44   *		Artur Skawina   :	Hash function optimizations
45   *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
46   *	      Malcolm Beattie   :	Set peercred for socketpair
47   *	     Michal Ostrowski   :       Module initialization cleanup.
48   *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
49   *	     				the core infrastructure is doing that
50   *	     				for all net proto families now (2.5.69+)
51   *
52   *
53   * Known differences from reference BSD that was tested:
54   *
55   *	[TO FIX]
56   *	ECONNREFUSED is not returned from one end of a connected() socket to the
57   *		other the moment one end closes.
58   *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
59   *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
60   *	[NOT TO FIX]
61   *	accept() returns a path name even if the connecting socket has closed
62   *		in the meantime (BSD loses the path and gives up).
63   *	accept() returns 0 length path for an unbound connector. BSD returns 16
64   *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65   *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
66   *	BSD af_unix apparently has connect forgetting to block properly.
67   *		(need to check this with the POSIX spec in detail)
68   *
69   * Differences from 2.0.0-11-... (ANK)
70   *	Bug fixes and improvements.
71   *		- client shutdown killed server socket.
72   *		- removed all useless cli/sti pairs.
73   *
74   *	Semantic changes/extensions.
75   *		- generic control message passing.
76   *		- SCM_CREDENTIALS control message.
77   *		- "Abstract" (not FS based) socket bindings.
78   *		  Abstract names are sequences of bytes (not zero terminated)
79   *		  started by 0, so that this name space does not intersect
80   *		  with BSD names.
81   */
82  
83  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
84  
85  #include <linux/module.h>
86  #include <linux/kernel.h>
87  #include <linux/signal.h>
88  #include <linux/sched.h>
89  #include <linux/errno.h>
90  #include <linux/string.h>
91  #include <linux/stat.h>
92  #include <linux/dcache.h>
93  #include <linux/namei.h>
94  #include <linux/socket.h>
95  #include <linux/un.h>
96  #include <linux/fcntl.h>
97  #include <linux/termios.h>
98  #include <linux/sockios.h>
99  #include <linux/net.h>
100  #include <linux/in.h>
101  #include <linux/fs.h>
102  #include <linux/slab.h>
103  #include <linux/uaccess.h>
104  #include <linux/skbuff.h>
105  #include <linux/netdevice.h>
106  #include <net/net_namespace.h>
107  #include <net/sock.h>
108  #include <net/tcp_states.h>
109  #include <net/af_unix.h>
110  #include <linux/proc_fs.h>
111  #include <linux/seq_file.h>
112  #include <net/scm.h>
113  #include <linux/init.h>
114  #include <linux/poll.h>
115  #include <linux/rtnetlink.h>
116  #include <linux/mount.h>
117  #include <net/checksum.h>
118  #include <linux/security.h>
119  #include <linux/freezer.h>
120  
121  struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
122  EXPORT_SYMBOL_GPL(unix_socket_table);
123  DEFINE_SPINLOCK(unix_table_lock);
124  EXPORT_SYMBOL_GPL(unix_table_lock);
125  static atomic_long_t unix_nr_socks;
126  
127  
128  static struct hlist_head *unix_sockets_unbound(void *addr)
129  {
130  	unsigned long hash = (unsigned long)addr;
131  
132  	hash ^= hash >> 16;
133  	hash ^= hash >> 8;
134  	hash %= UNIX_HASH_SIZE;
135  	return &unix_socket_table[UNIX_HASH_SIZE + hash];
136  }
137  
138  #define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
139  
140  #ifdef CONFIG_SECURITY_NETWORK
141  static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
142  {
143  	UNIXCB(skb).secid = scm->secid;
144  }
145  
146  static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
147  {
148  	scm->secid = UNIXCB(skb).secid;
149  }
150  
151  static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
152  {
153  	return (scm->secid == UNIXCB(skb).secid);
154  }
155  #else
156  static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
157  { }
158  
159  static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
160  { }
161  
162  static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
163  {
164  	return true;
165  }
166  #endif /* CONFIG_SECURITY_NETWORK */
167  
168  /*
169   *  SMP locking strategy:
170   *    hash table is protected with spinlock unix_table_lock
171   *    each socket state is protected by separate spin lock.
172   */
173  
174  static inline unsigned int unix_hash_fold(__wsum n)
175  {
176  	unsigned int hash = (__force unsigned int)csum_fold(n);
177  
178  	hash ^= hash>>8;
179  	return hash&(UNIX_HASH_SIZE-1);
180  }
181  
182  #define unix_peer(sk) (unix_sk(sk)->peer)
183  
184  static inline int unix_our_peer(struct sock *sk, struct sock *osk)
185  {
186  	return unix_peer(osk) == sk;
187  }
188  
189  static inline int unix_may_send(struct sock *sk, struct sock *osk)
190  {
191  	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
192  }
193  
194  static inline int unix_recvq_full(struct sock const *sk)
195  {
196  	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
197  }
198  
199  struct sock *unix_peer_get(struct sock *s)
200  {
201  	struct sock *peer;
202  
203  	unix_state_lock(s);
204  	peer = unix_peer(s);
205  	if (peer)
206  		sock_hold(peer);
207  	unix_state_unlock(s);
208  	return peer;
209  }
210  EXPORT_SYMBOL_GPL(unix_peer_get);
211  
212  static inline void unix_release_addr(struct unix_address *addr)
213  {
214  	if (atomic_dec_and_test(&addr->refcnt))
215  		kfree(addr);
216  }
217  
218  /*
219   *	Check unix socket name:
220   *		- should be not zero length.
221   *	        - if started by not zero, should be NULL terminated (FS object)
222   *		- if started by zero, it is abstract name.
223   */
224  
225  static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
226  {
227  	if (len <= sizeof(short) || len > sizeof(*sunaddr))
228  		return -EINVAL;
229  	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
230  		return -EINVAL;
231  	if (sunaddr->sun_path[0]) {
232  		/*
233  		 * This may look like an off by one error but it is a bit more
234  		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
235  		 * sun_path[108] doesn't as such exist.  However in kernel space
236  		 * we are guaranteed that it is a valid memory location in our
237  		 * kernel address buffer.
238  		 */
239  		((char *)sunaddr)[len] = 0;
240  		len = strlen(sunaddr->sun_path)+1+sizeof(short);
241  		return len;
242  	}
243  
244  	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
245  	return len;
246  }
247  
248  static void __unix_remove_socket(struct sock *sk)
249  {
250  	sk_del_node_init(sk);
251  }
252  
253  static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
254  {
255  	WARN_ON(!sk_unhashed(sk));
256  	sk_add_node(sk, list);
257  }
258  
259  static inline void unix_remove_socket(struct sock *sk)
260  {
261  	spin_lock(&unix_table_lock);
262  	__unix_remove_socket(sk);
263  	spin_unlock(&unix_table_lock);
264  }
265  
266  static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
267  {
268  	spin_lock(&unix_table_lock);
269  	__unix_insert_socket(list, sk);
270  	spin_unlock(&unix_table_lock);
271  }
272  
273  static struct sock *__unix_find_socket_byname(struct net *net,
274  					      struct sockaddr_un *sunname,
275  					      int len, int type, unsigned int hash)
276  {
277  	struct sock *s;
278  
279  	sk_for_each(s, &unix_socket_table[hash ^ type]) {
280  		struct unix_sock *u = unix_sk(s);
281  
282  		if (!net_eq(sock_net(s), net))
283  			continue;
284  
285  		if (u->addr->len == len &&
286  		    !memcmp(u->addr->name, sunname, len))
287  			goto found;
288  	}
289  	s = NULL;
290  found:
291  	return s;
292  }
293  
294  static inline struct sock *unix_find_socket_byname(struct net *net,
295  						   struct sockaddr_un *sunname,
296  						   int len, int type,
297  						   unsigned int hash)
298  {
299  	struct sock *s;
300  
301  	spin_lock(&unix_table_lock);
302  	s = __unix_find_socket_byname(net, sunname, len, type, hash);
303  	if (s)
304  		sock_hold(s);
305  	spin_unlock(&unix_table_lock);
306  	return s;
307  }
308  
309  static struct sock *unix_find_socket_byinode(struct inode *i)
310  {
311  	struct sock *s;
312  
313  	spin_lock(&unix_table_lock);
314  	sk_for_each(s,
315  		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
316  		struct dentry *dentry = unix_sk(s)->path.dentry;
317  
318  		if (dentry && d_backing_inode(dentry) == i) {
319  			sock_hold(s);
320  			goto found;
321  		}
322  	}
323  	s = NULL;
324  found:
325  	spin_unlock(&unix_table_lock);
326  	return s;
327  }
328  
329  /* Support code for asymmetrically connected dgram sockets
330   *
331   * If a datagram socket is connected to a socket not itself connected
332   * to the first socket (eg, /dev/log), clients may only enqueue more
333   * messages if the present receive queue of the server socket is not
334   * "too large". This means there's a second writeability condition
335   * poll and sendmsg need to test. The dgram recv code will do a wake
336   * up on the peer_wait wait queue of a socket upon reception of a
337   * datagram which needs to be propagated to sleeping would-be writers
338   * since these might not have sent anything so far. This can't be
339   * accomplished via poll_wait because the lifetime of the server
340   * socket might be less than that of its clients if these break their
341   * association with it or if the server socket is closed while clients
342   * are still connected to it and there's no way to inform "a polling
343   * implementation" that it should let go of a certain wait queue
344   *
345   * In order to propagate a wake up, a wait_queue_t of the client
346   * socket is enqueued on the peer_wait queue of the server socket
347   * whose wake function does a wake_up on the ordinary client socket
348   * wait queue. This connection is established whenever a write (or
349   * poll for write) hit the flow control condition and broken when the
350   * association to the server socket is dissolved or after a wake up
351   * was relayed.
352   */
353  
354  static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
355  				      void *key)
356  {
357  	struct unix_sock *u;
358  	wait_queue_head_t *u_sleep;
359  
360  	u = container_of(q, struct unix_sock, peer_wake);
361  
362  	__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
363  			    q);
364  	u->peer_wake.private = NULL;
365  
366  	/* relaying can only happen while the wq still exists */
367  	u_sleep = sk_sleep(&u->sk);
368  	if (u_sleep)
369  		wake_up_interruptible_poll(u_sleep, key);
370  
371  	return 0;
372  }
373  
374  static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
375  {
376  	struct unix_sock *u, *u_other;
377  	int rc;
378  
379  	u = unix_sk(sk);
380  	u_other = unix_sk(other);
381  	rc = 0;
382  	spin_lock(&u_other->peer_wait.lock);
383  
384  	if (!u->peer_wake.private) {
385  		u->peer_wake.private = other;
386  		__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
387  
388  		rc = 1;
389  	}
390  
391  	spin_unlock(&u_other->peer_wait.lock);
392  	return rc;
393  }
394  
395  static void unix_dgram_peer_wake_disconnect(struct sock *sk,
396  					    struct sock *other)
397  {
398  	struct unix_sock *u, *u_other;
399  
400  	u = unix_sk(sk);
401  	u_other = unix_sk(other);
402  	spin_lock(&u_other->peer_wait.lock);
403  
404  	if (u->peer_wake.private == other) {
405  		__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
406  		u->peer_wake.private = NULL;
407  	}
408  
409  	spin_unlock(&u_other->peer_wait.lock);
410  }
411  
412  static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
413  						   struct sock *other)
414  {
415  	unix_dgram_peer_wake_disconnect(sk, other);
416  	wake_up_interruptible_poll(sk_sleep(sk),
417  				   POLLOUT |
418  				   POLLWRNORM |
419  				   POLLWRBAND);
420  }
421  
422  /* preconditions:
423   *	- unix_peer(sk) == other
424   *	- association is stable
425   */
426  static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
427  {
428  	int connected;
429  
430  	connected = unix_dgram_peer_wake_connect(sk, other);
431  
432  	if (unix_recvq_full(other))
433  		return 1;
434  
435  	if (connected)
436  		unix_dgram_peer_wake_disconnect(sk, other);
437  
438  	return 0;
439  }
440  
441  static int unix_writable(const struct sock *sk)
442  {
443  	return sk->sk_state != TCP_LISTEN &&
444  	       (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
445  }
446  
447  static void unix_write_space(struct sock *sk)
448  {
449  	struct socket_wq *wq;
450  
451  	rcu_read_lock();
452  	if (unix_writable(sk)) {
453  		wq = rcu_dereference(sk->sk_wq);
454  		if (skwq_has_sleeper(wq))
455  			wake_up_interruptible_sync_poll(&wq->wait,
456  				POLLOUT | POLLWRNORM | POLLWRBAND);
457  		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
458  	}
459  	rcu_read_unlock();
460  }
461  
462  /* When dgram socket disconnects (or changes its peer), we clear its receive
463   * queue of packets arrived from previous peer. First, it allows to do
464   * flow control based only on wmem_alloc; second, sk connected to peer
465   * may receive messages only from that peer. */
466  static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
467  {
468  	if (!skb_queue_empty(&sk->sk_receive_queue)) {
469  		skb_queue_purge(&sk->sk_receive_queue);
470  		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
471  
472  		/* If one link of bidirectional dgram pipe is disconnected,
473  		 * we signal error. Messages are lost. Do not make this,
474  		 * when peer was not connected to us.
475  		 */
476  		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
477  			other->sk_err = ECONNRESET;
478  			other->sk_error_report(other);
479  		}
480  	}
481  }
482  
483  static void unix_sock_destructor(struct sock *sk)
484  {
485  	struct unix_sock *u = unix_sk(sk);
486  
487  	skb_queue_purge(&sk->sk_receive_queue);
488  
489  	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
490  	WARN_ON(!sk_unhashed(sk));
491  	WARN_ON(sk->sk_socket);
492  	if (!sock_flag(sk, SOCK_DEAD)) {
493  		pr_info("Attempt to release alive unix socket: %p\n", sk);
494  		return;
495  	}
496  
497  	if (u->addr)
498  		unix_release_addr(u->addr);
499  
500  	atomic_long_dec(&unix_nr_socks);
501  	local_bh_disable();
502  	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
503  	local_bh_enable();
504  #ifdef UNIX_REFCNT_DEBUG
505  	pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
506  		atomic_long_read(&unix_nr_socks));
507  #endif
508  }
509  
510  static void unix_release_sock(struct sock *sk, int embrion)
511  {
512  	struct unix_sock *u = unix_sk(sk);
513  	struct path path;
514  	struct sock *skpair;
515  	struct sk_buff *skb;
516  	int state;
517  
518  	unix_remove_socket(sk);
519  
520  	/* Clear state */
521  	unix_state_lock(sk);
522  	sock_orphan(sk);
523  	sk->sk_shutdown = SHUTDOWN_MASK;
524  	path	     = u->path;
525  	u->path.dentry = NULL;
526  	u->path.mnt = NULL;
527  	state = sk->sk_state;
528  	sk->sk_state = TCP_CLOSE;
529  	unix_state_unlock(sk);
530  
531  	wake_up_interruptible_all(&u->peer_wait);
532  
533  	skpair = unix_peer(sk);
534  
535  	if (skpair != NULL) {
536  		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
537  			unix_state_lock(skpair);
538  			/* No more writes */
539  			skpair->sk_shutdown = SHUTDOWN_MASK;
540  			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
541  				skpair->sk_err = ECONNRESET;
542  			unix_state_unlock(skpair);
543  			skpair->sk_state_change(skpair);
544  			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
545  		}
546  
547  		unix_dgram_peer_wake_disconnect(sk, skpair);
548  		sock_put(skpair); /* It may now die */
549  		unix_peer(sk) = NULL;
550  	}
551  
552  	/* Try to flush out this socket. Throw out buffers at least */
553  
554  	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
555  		if (state == TCP_LISTEN)
556  			unix_release_sock(skb->sk, 1);
557  		/* passed fds are erased in the kfree_skb hook	      */
558  		UNIXCB(skb).consumed = skb->len;
559  		kfree_skb(skb);
560  	}
561  
562  	if (path.dentry)
563  		path_put(&path);
564  
565  	sock_put(sk);
566  
567  	/* ---- Socket is dead now and most probably destroyed ---- */
568  
569  	/*
570  	 * Fixme: BSD difference: In BSD all sockets connected to us get
571  	 *	  ECONNRESET and we die on the spot. In Linux we behave
572  	 *	  like files and pipes do and wait for the last
573  	 *	  dereference.
574  	 *
575  	 * Can't we simply set sock->err?
576  	 *
577  	 *	  What the above comment does talk about? --ANK(980817)
578  	 */
579  
580  	if (unix_tot_inflight)
581  		unix_gc();		/* Garbage collect fds */
582  }
583  
584  static void init_peercred(struct sock *sk)
585  {
586  	put_pid(sk->sk_peer_pid);
587  	if (sk->sk_peer_cred)
588  		put_cred(sk->sk_peer_cred);
589  	sk->sk_peer_pid  = get_pid(task_tgid(current));
590  	sk->sk_peer_cred = get_current_cred();
591  }
592  
593  static void copy_peercred(struct sock *sk, struct sock *peersk)
594  {
595  	put_pid(sk->sk_peer_pid);
596  	if (sk->sk_peer_cred)
597  		put_cred(sk->sk_peer_cred);
598  	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
599  	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
600  }
601  
602  static int unix_listen(struct socket *sock, int backlog)
603  {
604  	int err;
605  	struct sock *sk = sock->sk;
606  	struct unix_sock *u = unix_sk(sk);
607  	struct pid *old_pid = NULL;
608  
609  	err = -EOPNOTSUPP;
610  	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
611  		goto out;	/* Only stream/seqpacket sockets accept */
612  	err = -EINVAL;
613  	if (!u->addr)
614  		goto out;	/* No listens on an unbound socket */
615  	unix_state_lock(sk);
616  	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
617  		goto out_unlock;
618  	if (backlog > sk->sk_max_ack_backlog)
619  		wake_up_interruptible_all(&u->peer_wait);
620  	sk->sk_max_ack_backlog	= backlog;
621  	sk->sk_state		= TCP_LISTEN;
622  	/* set credentials so connect can copy them */
623  	init_peercred(sk);
624  	err = 0;
625  
626  out_unlock:
627  	unix_state_unlock(sk);
628  	put_pid(old_pid);
629  out:
630  	return err;
631  }
632  
633  static int unix_release(struct socket *);
634  static int unix_bind(struct socket *, struct sockaddr *, int);
635  static int unix_stream_connect(struct socket *, struct sockaddr *,
636  			       int addr_len, int flags);
637  static int unix_socketpair(struct socket *, struct socket *);
638  static int unix_accept(struct socket *, struct socket *, int);
639  static int unix_getname(struct socket *, struct sockaddr *, int *, int);
640  static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
641  static unsigned int unix_dgram_poll(struct file *, struct socket *,
642  				    poll_table *);
643  static int unix_ioctl(struct socket *, unsigned int, unsigned long);
644  static int unix_shutdown(struct socket *, int);
645  static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
646  static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
647  static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
648  				    size_t size, int flags);
649  static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
650  				       struct pipe_inode_info *, size_t size,
651  				       unsigned int flags);
652  static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
653  static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
654  static int unix_dgram_connect(struct socket *, struct sockaddr *,
655  			      int, int);
656  static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
657  static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
658  				  int);
659  
660  static int unix_set_peek_off(struct sock *sk, int val)
661  {
662  	struct unix_sock *u = unix_sk(sk);
663  
664  	if (mutex_lock_interruptible(&u->iolock))
665  		return -EINTR;
666  
667  	sk->sk_peek_off = val;
668  	mutex_unlock(&u->iolock);
669  
670  	return 0;
671  }
672  
673  
674  static const struct proto_ops unix_stream_ops = {
675  	.family =	PF_UNIX,
676  	.owner =	THIS_MODULE,
677  	.release =	unix_release,
678  	.bind =		unix_bind,
679  	.connect =	unix_stream_connect,
680  	.socketpair =	unix_socketpair,
681  	.accept =	unix_accept,
682  	.getname =	unix_getname,
683  	.poll =		unix_poll,
684  	.ioctl =	unix_ioctl,
685  	.listen =	unix_listen,
686  	.shutdown =	unix_shutdown,
687  	.setsockopt =	sock_no_setsockopt,
688  	.getsockopt =	sock_no_getsockopt,
689  	.sendmsg =	unix_stream_sendmsg,
690  	.recvmsg =	unix_stream_recvmsg,
691  	.mmap =		sock_no_mmap,
692  	.sendpage =	unix_stream_sendpage,
693  	.splice_read =	unix_stream_splice_read,
694  	.set_peek_off =	unix_set_peek_off,
695  };
696  
697  static const struct proto_ops unix_dgram_ops = {
698  	.family =	PF_UNIX,
699  	.owner =	THIS_MODULE,
700  	.release =	unix_release,
701  	.bind =		unix_bind,
702  	.connect =	unix_dgram_connect,
703  	.socketpair =	unix_socketpair,
704  	.accept =	sock_no_accept,
705  	.getname =	unix_getname,
706  	.poll =		unix_dgram_poll,
707  	.ioctl =	unix_ioctl,
708  	.listen =	sock_no_listen,
709  	.shutdown =	unix_shutdown,
710  	.setsockopt =	sock_no_setsockopt,
711  	.getsockopt =	sock_no_getsockopt,
712  	.sendmsg =	unix_dgram_sendmsg,
713  	.recvmsg =	unix_dgram_recvmsg,
714  	.mmap =		sock_no_mmap,
715  	.sendpage =	sock_no_sendpage,
716  	.set_peek_off =	unix_set_peek_off,
717  };
718  
719  static const struct proto_ops unix_seqpacket_ops = {
720  	.family =	PF_UNIX,
721  	.owner =	THIS_MODULE,
722  	.release =	unix_release,
723  	.bind =		unix_bind,
724  	.connect =	unix_stream_connect,
725  	.socketpair =	unix_socketpair,
726  	.accept =	unix_accept,
727  	.getname =	unix_getname,
728  	.poll =		unix_dgram_poll,
729  	.ioctl =	unix_ioctl,
730  	.listen =	unix_listen,
731  	.shutdown =	unix_shutdown,
732  	.setsockopt =	sock_no_setsockopt,
733  	.getsockopt =	sock_no_getsockopt,
734  	.sendmsg =	unix_seqpacket_sendmsg,
735  	.recvmsg =	unix_seqpacket_recvmsg,
736  	.mmap =		sock_no_mmap,
737  	.sendpage =	sock_no_sendpage,
738  	.set_peek_off =	unix_set_peek_off,
739  };
740  
741  static struct proto unix_proto = {
742  	.name			= "UNIX",
743  	.owner			= THIS_MODULE,
744  	.obj_size		= sizeof(struct unix_sock),
745  };
746  
747  /*
748   * AF_UNIX sockets do not interact with hardware, hence they
749   * dont trigger interrupts - so it's safe for them to have
750   * bh-unsafe locking for their sk_receive_queue.lock. Split off
751   * this special lock-class by reinitializing the spinlock key:
752   */
753  static struct lock_class_key af_unix_sk_receive_queue_lock_key;
754  
755  static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
756  {
757  	struct sock *sk = NULL;
758  	struct unix_sock *u;
759  
760  	atomic_long_inc(&unix_nr_socks);
761  	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
762  		goto out;
763  
764  	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
765  	if (!sk)
766  		goto out;
767  
768  	sock_init_data(sock, sk);
769  	lockdep_set_class(&sk->sk_receive_queue.lock,
770  				&af_unix_sk_receive_queue_lock_key);
771  
772  	sk->sk_allocation	= GFP_KERNEL_ACCOUNT;
773  	sk->sk_write_space	= unix_write_space;
774  	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
775  	sk->sk_destruct		= unix_sock_destructor;
776  	u	  = unix_sk(sk);
777  	u->path.dentry = NULL;
778  	u->path.mnt = NULL;
779  	spin_lock_init(&u->lock);
780  	atomic_long_set(&u->inflight, 0);
781  	INIT_LIST_HEAD(&u->link);
782  	mutex_init(&u->iolock); /* single task reading lock */
783  	mutex_init(&u->bindlock); /* single task binding lock */
784  	init_waitqueue_head(&u->peer_wait);
785  	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
786  	unix_insert_socket(unix_sockets_unbound(sk), sk);
787  out:
788  	if (sk == NULL)
789  		atomic_long_dec(&unix_nr_socks);
790  	else {
791  		local_bh_disable();
792  		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
793  		local_bh_enable();
794  	}
795  	return sk;
796  }
797  
798  static int unix_create(struct net *net, struct socket *sock, int protocol,
799  		       int kern)
800  {
801  	if (protocol && protocol != PF_UNIX)
802  		return -EPROTONOSUPPORT;
803  
804  	sock->state = SS_UNCONNECTED;
805  
806  	switch (sock->type) {
807  	case SOCK_STREAM:
808  		sock->ops = &unix_stream_ops;
809  		break;
810  		/*
811  		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
812  		 *	nothing uses it.
813  		 */
814  	case SOCK_RAW:
815  		sock->type = SOCK_DGRAM;
816  	case SOCK_DGRAM:
817  		sock->ops = &unix_dgram_ops;
818  		break;
819  	case SOCK_SEQPACKET:
820  		sock->ops = &unix_seqpacket_ops;
821  		break;
822  	default:
823  		return -ESOCKTNOSUPPORT;
824  	}
825  
826  	return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
827  }
828  
829  static int unix_release(struct socket *sock)
830  {
831  	struct sock *sk = sock->sk;
832  
833  	if (!sk)
834  		return 0;
835  
836  	unix_release_sock(sk, 0);
837  	sock->sk = NULL;
838  
839  	return 0;
840  }
841  
842  static int unix_autobind(struct socket *sock)
843  {
844  	struct sock *sk = sock->sk;
845  	struct net *net = sock_net(sk);
846  	struct unix_sock *u = unix_sk(sk);
847  	static u32 ordernum = 1;
848  	struct unix_address *addr;
849  	int err;
850  	unsigned int retries = 0;
851  
852  	err = mutex_lock_interruptible(&u->bindlock);
853  	if (err)
854  		return err;
855  
856  	err = 0;
857  	if (u->addr)
858  		goto out;
859  
860  	err = -ENOMEM;
861  	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
862  	if (!addr)
863  		goto out;
864  
865  	addr->name->sun_family = AF_UNIX;
866  	atomic_set(&addr->refcnt, 1);
867  
868  retry:
869  	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
870  	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
871  
872  	spin_lock(&unix_table_lock);
873  	ordernum = (ordernum+1)&0xFFFFF;
874  
875  	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
876  				      addr->hash)) {
877  		spin_unlock(&unix_table_lock);
878  		/*
879  		 * __unix_find_socket_byname() may take long time if many names
880  		 * are already in use.
881  		 */
882  		cond_resched();
883  		/* Give up if all names seems to be in use. */
884  		if (retries++ == 0xFFFFF) {
885  			err = -ENOSPC;
886  			kfree(addr);
887  			goto out;
888  		}
889  		goto retry;
890  	}
891  	addr->hash ^= sk->sk_type;
892  
893  	__unix_remove_socket(sk);
894  	u->addr = addr;
895  	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
896  	spin_unlock(&unix_table_lock);
897  	err = 0;
898  
899  out:	mutex_unlock(&u->bindlock);
900  	return err;
901  }
902  
903  static struct sock *unix_find_other(struct net *net,
904  				    struct sockaddr_un *sunname, int len,
905  				    int type, unsigned int hash, int *error)
906  {
907  	struct sock *u;
908  	struct path path;
909  	int err = 0;
910  
911  	if (sunname->sun_path[0]) {
912  		struct inode *inode;
913  		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
914  		if (err)
915  			goto fail;
916  		inode = d_backing_inode(path.dentry);
917  		err = inode_permission(inode, MAY_WRITE);
918  		if (err)
919  			goto put_fail;
920  
921  		err = -ECONNREFUSED;
922  		if (!S_ISSOCK(inode->i_mode))
923  			goto put_fail;
924  		u = unix_find_socket_byinode(inode);
925  		if (!u)
926  			goto put_fail;
927  
928  		if (u->sk_type == type)
929  			touch_atime(&path);
930  
931  		path_put(&path);
932  
933  		err = -EPROTOTYPE;
934  		if (u->sk_type != type) {
935  			sock_put(u);
936  			goto fail;
937  		}
938  	} else {
939  		err = -ECONNREFUSED;
940  		u = unix_find_socket_byname(net, sunname, len, type, hash);
941  		if (u) {
942  			struct dentry *dentry;
943  			dentry = unix_sk(u)->path.dentry;
944  			if (dentry)
945  				touch_atime(&unix_sk(u)->path);
946  		} else
947  			goto fail;
948  	}
949  	return u;
950  
951  put_fail:
952  	path_put(&path);
953  fail:
954  	*error = err;
955  	return NULL;
956  }
957  
958  static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
959  {
960  	struct dentry *dentry;
961  	struct path path;
962  	int err = 0;
963  	/*
964  	 * Get the parent directory, calculate the hash for last
965  	 * component.
966  	 */
967  	dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
968  	err = PTR_ERR(dentry);
969  	if (IS_ERR(dentry))
970  		return err;
971  
972  	/*
973  	 * All right, let's create it.
974  	 */
975  	err = security_path_mknod(&path, dentry, mode, 0);
976  	if (!err) {
977  		err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
978  		if (!err) {
979  			res->mnt = mntget(path.mnt);
980  			res->dentry = dget(dentry);
981  		}
982  	}
983  	done_path_create(&path, dentry);
984  	return err;
985  }
986  
987  static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
988  {
989  	struct sock *sk = sock->sk;
990  	struct net *net = sock_net(sk);
991  	struct unix_sock *u = unix_sk(sk);
992  	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
993  	char *sun_path = sunaddr->sun_path;
994  	int err;
995  	unsigned int hash;
996  	struct unix_address *addr;
997  	struct hlist_head *list;
998  	struct path path = { NULL, NULL };
999  
1000  	err = -EINVAL;
1001  	if (sunaddr->sun_family != AF_UNIX)
1002  		goto out;
1003  
1004  	if (addr_len == sizeof(short)) {
1005  		err = unix_autobind(sock);
1006  		goto out;
1007  	}
1008  
1009  	err = unix_mkname(sunaddr, addr_len, &hash);
1010  	if (err < 0)
1011  		goto out;
1012  	addr_len = err;
1013  
1014  	if (sun_path[0]) {
1015  		umode_t mode = S_IFSOCK |
1016  		       (SOCK_INODE(sock)->i_mode & ~current_umask());
1017  		err = unix_mknod(sun_path, mode, &path);
1018  		if (err) {
1019  			if (err == -EEXIST)
1020  				err = -EADDRINUSE;
1021  			goto out;
1022  		}
1023  	}
1024  
1025  	err = mutex_lock_interruptible(&u->bindlock);
1026  	if (err)
1027  		goto out_put;
1028  
1029  	err = -EINVAL;
1030  	if (u->addr)
1031  		goto out_up;
1032  
1033  	err = -ENOMEM;
1034  	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1035  	if (!addr)
1036  		goto out_up;
1037  
1038  	memcpy(addr->name, sunaddr, addr_len);
1039  	addr->len = addr_len;
1040  	addr->hash = hash ^ sk->sk_type;
1041  	atomic_set(&addr->refcnt, 1);
1042  
1043  	if (sun_path[0]) {
1044  		addr->hash = UNIX_HASH_SIZE;
1045  		hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1046  		spin_lock(&unix_table_lock);
1047  		u->path = path;
1048  		list = &unix_socket_table[hash];
1049  	} else {
1050  		spin_lock(&unix_table_lock);
1051  		err = -EADDRINUSE;
1052  		if (__unix_find_socket_byname(net, sunaddr, addr_len,
1053  					      sk->sk_type, hash)) {
1054  			unix_release_addr(addr);
1055  			goto out_unlock;
1056  		}
1057  
1058  		list = &unix_socket_table[addr->hash];
1059  	}
1060  
1061  	err = 0;
1062  	__unix_remove_socket(sk);
1063  	u->addr = addr;
1064  	__unix_insert_socket(list, sk);
1065  
1066  out_unlock:
1067  	spin_unlock(&unix_table_lock);
1068  out_up:
1069  	mutex_unlock(&u->bindlock);
1070  out_put:
1071  	if (err)
1072  		path_put(&path);
1073  out:
1074  	return err;
1075  }
1076  
1077  static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1078  {
1079  	if (unlikely(sk1 == sk2) || !sk2) {
1080  		unix_state_lock(sk1);
1081  		return;
1082  	}
1083  	if (sk1 < sk2) {
1084  		unix_state_lock(sk1);
1085  		unix_state_lock_nested(sk2);
1086  	} else {
1087  		unix_state_lock(sk2);
1088  		unix_state_lock_nested(sk1);
1089  	}
1090  }
1091  
1092  static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1093  {
1094  	if (unlikely(sk1 == sk2) || !sk2) {
1095  		unix_state_unlock(sk1);
1096  		return;
1097  	}
1098  	unix_state_unlock(sk1);
1099  	unix_state_unlock(sk2);
1100  }
1101  
1102  static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1103  			      int alen, int flags)
1104  {
1105  	struct sock *sk = sock->sk;
1106  	struct net *net = sock_net(sk);
1107  	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1108  	struct sock *other;
1109  	unsigned int hash;
1110  	int err;
1111  
1112  	if (addr->sa_family != AF_UNSPEC) {
1113  		err = unix_mkname(sunaddr, alen, &hash);
1114  		if (err < 0)
1115  			goto out;
1116  		alen = err;
1117  
1118  		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1119  		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1120  			goto out;
1121  
1122  restart:
1123  		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1124  		if (!other)
1125  			goto out;
1126  
1127  		unix_state_double_lock(sk, other);
1128  
1129  		/* Apparently VFS overslept socket death. Retry. */
1130  		if (sock_flag(other, SOCK_DEAD)) {
1131  			unix_state_double_unlock(sk, other);
1132  			sock_put(other);
1133  			goto restart;
1134  		}
1135  
1136  		err = -EPERM;
1137  		if (!unix_may_send(sk, other))
1138  			goto out_unlock;
1139  
1140  		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1141  		if (err)
1142  			goto out_unlock;
1143  
1144  	} else {
1145  		/*
1146  		 *	1003.1g breaking connected state with AF_UNSPEC
1147  		 */
1148  		other = NULL;
1149  		unix_state_double_lock(sk, other);
1150  	}
1151  
1152  	/*
1153  	 * If it was connected, reconnect.
1154  	 */
1155  	if (unix_peer(sk)) {
1156  		struct sock *old_peer = unix_peer(sk);
1157  		unix_peer(sk) = other;
1158  		unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1159  
1160  		unix_state_double_unlock(sk, other);
1161  
1162  		if (other != old_peer)
1163  			unix_dgram_disconnected(sk, old_peer);
1164  		sock_put(old_peer);
1165  	} else {
1166  		unix_peer(sk) = other;
1167  		unix_state_double_unlock(sk, other);
1168  	}
1169  	return 0;
1170  
1171  out_unlock:
1172  	unix_state_double_unlock(sk, other);
1173  	sock_put(other);
1174  out:
1175  	return err;
1176  }
1177  
1178  static long unix_wait_for_peer(struct sock *other, long timeo)
1179  {
1180  	struct unix_sock *u = unix_sk(other);
1181  	int sched;
1182  	DEFINE_WAIT(wait);
1183  
1184  	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1185  
1186  	sched = !sock_flag(other, SOCK_DEAD) &&
1187  		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1188  		unix_recvq_full(other);
1189  
1190  	unix_state_unlock(other);
1191  
1192  	if (sched)
1193  		timeo = schedule_timeout(timeo);
1194  
1195  	finish_wait(&u->peer_wait, &wait);
1196  	return timeo;
1197  }
1198  
1199  static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1200  			       int addr_len, int flags)
1201  {
1202  	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1203  	struct sock *sk = sock->sk;
1204  	struct net *net = sock_net(sk);
1205  	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1206  	struct sock *newsk = NULL;
1207  	struct sock *other = NULL;
1208  	struct sk_buff *skb = NULL;
1209  	unsigned int hash;
1210  	int st;
1211  	int err;
1212  	long timeo;
1213  
1214  	err = unix_mkname(sunaddr, addr_len, &hash);
1215  	if (err < 0)
1216  		goto out;
1217  	addr_len = err;
1218  
1219  	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1220  	    (err = unix_autobind(sock)) != 0)
1221  		goto out;
1222  
1223  	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1224  
1225  	/* First of all allocate resources.
1226  	   If we will make it after state is locked,
1227  	   we will have to recheck all again in any case.
1228  	 */
1229  
1230  	err = -ENOMEM;
1231  
1232  	/* create new sock for complete connection */
1233  	newsk = unix_create1(sock_net(sk), NULL, 0);
1234  	if (newsk == NULL)
1235  		goto out;
1236  
1237  	/* Allocate skb for sending to listening sock */
1238  	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1239  	if (skb == NULL)
1240  		goto out;
1241  
1242  restart:
1243  	/*  Find listening sock. */
1244  	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1245  	if (!other)
1246  		goto out;
1247  
1248  	/* Latch state of peer */
1249  	unix_state_lock(other);
1250  
1251  	/* Apparently VFS overslept socket death. Retry. */
1252  	if (sock_flag(other, SOCK_DEAD)) {
1253  		unix_state_unlock(other);
1254  		sock_put(other);
1255  		goto restart;
1256  	}
1257  
1258  	err = -ECONNREFUSED;
1259  	if (other->sk_state != TCP_LISTEN)
1260  		goto out_unlock;
1261  	if (other->sk_shutdown & RCV_SHUTDOWN)
1262  		goto out_unlock;
1263  
1264  	if (unix_recvq_full(other)) {
1265  		err = -EAGAIN;
1266  		if (!timeo)
1267  			goto out_unlock;
1268  
1269  		timeo = unix_wait_for_peer(other, timeo);
1270  
1271  		err = sock_intr_errno(timeo);
1272  		if (signal_pending(current))
1273  			goto out;
1274  		sock_put(other);
1275  		goto restart;
1276  	}
1277  
1278  	/* Latch our state.
1279  
1280  	   It is tricky place. We need to grab our state lock and cannot
1281  	   drop lock on peer. It is dangerous because deadlock is
1282  	   possible. Connect to self case and simultaneous
1283  	   attempt to connect are eliminated by checking socket
1284  	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1285  	   check this before attempt to grab lock.
1286  
1287  	   Well, and we have to recheck the state after socket locked.
1288  	 */
1289  	st = sk->sk_state;
1290  
1291  	switch (st) {
1292  	case TCP_CLOSE:
1293  		/* This is ok... continue with connect */
1294  		break;
1295  	case TCP_ESTABLISHED:
1296  		/* Socket is already connected */
1297  		err = -EISCONN;
1298  		goto out_unlock;
1299  	default:
1300  		err = -EINVAL;
1301  		goto out_unlock;
1302  	}
1303  
1304  	unix_state_lock_nested(sk);
1305  
1306  	if (sk->sk_state != st) {
1307  		unix_state_unlock(sk);
1308  		unix_state_unlock(other);
1309  		sock_put(other);
1310  		goto restart;
1311  	}
1312  
1313  	err = security_unix_stream_connect(sk, other, newsk);
1314  	if (err) {
1315  		unix_state_unlock(sk);
1316  		goto out_unlock;
1317  	}
1318  
1319  	/* The way is open! Fastly set all the necessary fields... */
1320  
1321  	sock_hold(sk);
1322  	unix_peer(newsk)	= sk;
1323  	newsk->sk_state		= TCP_ESTABLISHED;
1324  	newsk->sk_type		= sk->sk_type;
1325  	init_peercred(newsk);
1326  	newu = unix_sk(newsk);
1327  	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1328  	otheru = unix_sk(other);
1329  
1330  	/* copy address information from listening to new sock*/
1331  	if (otheru->addr) {
1332  		atomic_inc(&otheru->addr->refcnt);
1333  		newu->addr = otheru->addr;
1334  	}
1335  	if (otheru->path.dentry) {
1336  		path_get(&otheru->path);
1337  		newu->path = otheru->path;
1338  	}
1339  
1340  	/* Set credentials */
1341  	copy_peercred(sk, other);
1342  
1343  	sock->state	= SS_CONNECTED;
1344  	sk->sk_state	= TCP_ESTABLISHED;
1345  	sock_hold(newsk);
1346  
1347  	smp_mb__after_atomic();	/* sock_hold() does an atomic_inc() */
1348  	unix_peer(sk)	= newsk;
1349  
1350  	unix_state_unlock(sk);
1351  
1352  	/* take ten and and send info to listening sock */
1353  	spin_lock(&other->sk_receive_queue.lock);
1354  	__skb_queue_tail(&other->sk_receive_queue, skb);
1355  	spin_unlock(&other->sk_receive_queue.lock);
1356  	unix_state_unlock(other);
1357  	other->sk_data_ready(other);
1358  	sock_put(other);
1359  	return 0;
1360  
1361  out_unlock:
1362  	if (other)
1363  		unix_state_unlock(other);
1364  
1365  out:
1366  	kfree_skb(skb);
1367  	if (newsk)
1368  		unix_release_sock(newsk, 0);
1369  	if (other)
1370  		sock_put(other);
1371  	return err;
1372  }
1373  
1374  static int unix_socketpair(struct socket *socka, struct socket *sockb)
1375  {
1376  	struct sock *ska = socka->sk, *skb = sockb->sk;
1377  
1378  	/* Join our sockets back to back */
1379  	sock_hold(ska);
1380  	sock_hold(skb);
1381  	unix_peer(ska) = skb;
1382  	unix_peer(skb) = ska;
1383  	init_peercred(ska);
1384  	init_peercred(skb);
1385  
1386  	if (ska->sk_type != SOCK_DGRAM) {
1387  		ska->sk_state = TCP_ESTABLISHED;
1388  		skb->sk_state = TCP_ESTABLISHED;
1389  		socka->state  = SS_CONNECTED;
1390  		sockb->state  = SS_CONNECTED;
1391  	}
1392  	return 0;
1393  }
1394  
1395  static void unix_sock_inherit_flags(const struct socket *old,
1396  				    struct socket *new)
1397  {
1398  	if (test_bit(SOCK_PASSCRED, &old->flags))
1399  		set_bit(SOCK_PASSCRED, &new->flags);
1400  	if (test_bit(SOCK_PASSSEC, &old->flags))
1401  		set_bit(SOCK_PASSSEC, &new->flags);
1402  }
1403  
1404  static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1405  {
1406  	struct sock *sk = sock->sk;
1407  	struct sock *tsk;
1408  	struct sk_buff *skb;
1409  	int err;
1410  
1411  	err = -EOPNOTSUPP;
1412  	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1413  		goto out;
1414  
1415  	err = -EINVAL;
1416  	if (sk->sk_state != TCP_LISTEN)
1417  		goto out;
1418  
1419  	/* If socket state is TCP_LISTEN it cannot change (for now...),
1420  	 * so that no locks are necessary.
1421  	 */
1422  
1423  	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1424  	if (!skb) {
1425  		/* This means receive shutdown. */
1426  		if (err == 0)
1427  			err = -EINVAL;
1428  		goto out;
1429  	}
1430  
1431  	tsk = skb->sk;
1432  	skb_free_datagram(sk, skb);
1433  	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1434  
1435  	/* attach accepted sock to socket */
1436  	unix_state_lock(tsk);
1437  	newsock->state = SS_CONNECTED;
1438  	unix_sock_inherit_flags(sock, newsock);
1439  	sock_graft(tsk, newsock);
1440  	unix_state_unlock(tsk);
1441  	return 0;
1442  
1443  out:
1444  	return err;
1445  }
1446  
1447  
1448  static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1449  {
1450  	struct sock *sk = sock->sk;
1451  	struct unix_sock *u;
1452  	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1453  	int err = 0;
1454  
1455  	if (peer) {
1456  		sk = unix_peer_get(sk);
1457  
1458  		err = -ENOTCONN;
1459  		if (!sk)
1460  			goto out;
1461  		err = 0;
1462  	} else {
1463  		sock_hold(sk);
1464  	}
1465  
1466  	u = unix_sk(sk);
1467  	unix_state_lock(sk);
1468  	if (!u->addr) {
1469  		sunaddr->sun_family = AF_UNIX;
1470  		sunaddr->sun_path[0] = 0;
1471  		*uaddr_len = sizeof(short);
1472  	} else {
1473  		struct unix_address *addr = u->addr;
1474  
1475  		*uaddr_len = addr->len;
1476  		memcpy(sunaddr, addr->name, *uaddr_len);
1477  	}
1478  	unix_state_unlock(sk);
1479  	sock_put(sk);
1480  out:
1481  	return err;
1482  }
1483  
1484  static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1485  {
1486  	int i;
1487  
1488  	scm->fp = UNIXCB(skb).fp;
1489  	UNIXCB(skb).fp = NULL;
1490  
1491  	for (i = scm->fp->count-1; i >= 0; i--)
1492  		unix_notinflight(scm->fp->user, scm->fp->fp[i]);
1493  }
1494  
1495  static void unix_destruct_scm(struct sk_buff *skb)
1496  {
1497  	struct scm_cookie scm;
1498  	memset(&scm, 0, sizeof(scm));
1499  	scm.pid  = UNIXCB(skb).pid;
1500  	if (UNIXCB(skb).fp)
1501  		unix_detach_fds(&scm, skb);
1502  
1503  	/* Alas, it calls VFS */
1504  	/* So fscking what? fput() had been SMP-safe since the last Summer */
1505  	scm_destroy(&scm);
1506  	sock_wfree(skb);
1507  }
1508  
1509  /*
1510   * The "user->unix_inflight" variable is protected by the garbage
1511   * collection lock, and we just read it locklessly here. If you go
1512   * over the limit, there might be a tiny race in actually noticing
1513   * it across threads. Tough.
1514   */
1515  static inline bool too_many_unix_fds(struct task_struct *p)
1516  {
1517  	struct user_struct *user = current_user();
1518  
1519  	if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
1520  		return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1521  	return false;
1522  }
1523  
1524  #define MAX_RECURSION_LEVEL 4
1525  
1526  static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1527  {
1528  	int i;
1529  	unsigned char max_level = 0;
1530  
1531  	if (too_many_unix_fds(current))
1532  		return -ETOOMANYREFS;
1533  
1534  	for (i = scm->fp->count - 1; i >= 0; i--) {
1535  		struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1536  
1537  		if (sk)
1538  			max_level = max(max_level,
1539  					unix_sk(sk)->recursion_level);
1540  	}
1541  	if (unlikely(max_level > MAX_RECURSION_LEVEL))
1542  		return -ETOOMANYREFS;
1543  
1544  	/*
1545  	 * Need to duplicate file references for the sake of garbage
1546  	 * collection.  Otherwise a socket in the fps might become a
1547  	 * candidate for GC while the skb is not yet queued.
1548  	 */
1549  	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1550  	if (!UNIXCB(skb).fp)
1551  		return -ENOMEM;
1552  
1553  	for (i = scm->fp->count - 1; i >= 0; i--)
1554  		unix_inflight(scm->fp->user, scm->fp->fp[i]);
1555  	return max_level;
1556  }
1557  
1558  static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1559  {
1560  	int err = 0;
1561  
1562  	UNIXCB(skb).pid  = get_pid(scm->pid);
1563  	UNIXCB(skb).uid = scm->creds.uid;
1564  	UNIXCB(skb).gid = scm->creds.gid;
1565  	UNIXCB(skb).fp = NULL;
1566  	unix_get_secdata(scm, skb);
1567  	if (scm->fp && send_fds)
1568  		err = unix_attach_fds(scm, skb);
1569  
1570  	skb->destructor = unix_destruct_scm;
1571  	return err;
1572  }
1573  
1574  static bool unix_passcred_enabled(const struct socket *sock,
1575  				  const struct sock *other)
1576  {
1577  	return test_bit(SOCK_PASSCRED, &sock->flags) ||
1578  	       !other->sk_socket ||
1579  	       test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1580  }
1581  
1582  /*
1583   * Some apps rely on write() giving SCM_CREDENTIALS
1584   * We include credentials if source or destination socket
1585   * asserted SOCK_PASSCRED.
1586   */
1587  static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1588  			    const struct sock *other)
1589  {
1590  	if (UNIXCB(skb).pid)
1591  		return;
1592  	if (unix_passcred_enabled(sock, other)) {
1593  		UNIXCB(skb).pid  = get_pid(task_tgid(current));
1594  		current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1595  	}
1596  }
1597  
1598  static int maybe_init_creds(struct scm_cookie *scm,
1599  			    struct socket *socket,
1600  			    const struct sock *other)
1601  {
1602  	int err;
1603  	struct msghdr msg = { .msg_controllen = 0 };
1604  
1605  	err = scm_send(socket, &msg, scm, false);
1606  	if (err)
1607  		return err;
1608  
1609  	if (unix_passcred_enabled(socket, other)) {
1610  		scm->pid = get_pid(task_tgid(current));
1611  		current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1612  	}
1613  	return err;
1614  }
1615  
1616  static bool unix_skb_scm_eq(struct sk_buff *skb,
1617  			    struct scm_cookie *scm)
1618  {
1619  	const struct unix_skb_parms *u = &UNIXCB(skb);
1620  
1621  	return u->pid == scm->pid &&
1622  	       uid_eq(u->uid, scm->creds.uid) &&
1623  	       gid_eq(u->gid, scm->creds.gid) &&
1624  	       unix_secdata_eq(scm, skb);
1625  }
1626  
1627  /*
1628   *	Send AF_UNIX data.
1629   */
1630  
1631  static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1632  			      size_t len)
1633  {
1634  	struct sock *sk = sock->sk;
1635  	struct net *net = sock_net(sk);
1636  	struct unix_sock *u = unix_sk(sk);
1637  	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1638  	struct sock *other = NULL;
1639  	int namelen = 0; /* fake GCC */
1640  	int err;
1641  	unsigned int hash;
1642  	struct sk_buff *skb;
1643  	long timeo;
1644  	struct scm_cookie scm;
1645  	int max_level;
1646  	int data_len = 0;
1647  	int sk_locked;
1648  
1649  	wait_for_unix_gc();
1650  	err = scm_send(sock, msg, &scm, false);
1651  	if (err < 0)
1652  		return err;
1653  
1654  	err = -EOPNOTSUPP;
1655  	if (msg->msg_flags&MSG_OOB)
1656  		goto out;
1657  
1658  	if (msg->msg_namelen) {
1659  		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1660  		if (err < 0)
1661  			goto out;
1662  		namelen = err;
1663  	} else {
1664  		sunaddr = NULL;
1665  		err = -ENOTCONN;
1666  		other = unix_peer_get(sk);
1667  		if (!other)
1668  			goto out;
1669  	}
1670  
1671  	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1672  	    && (err = unix_autobind(sock)) != 0)
1673  		goto out;
1674  
1675  	err = -EMSGSIZE;
1676  	if (len > sk->sk_sndbuf - 32)
1677  		goto out;
1678  
1679  	if (len > SKB_MAX_ALLOC) {
1680  		data_len = min_t(size_t,
1681  				 len - SKB_MAX_ALLOC,
1682  				 MAX_SKB_FRAGS * PAGE_SIZE);
1683  		data_len = PAGE_ALIGN(data_len);
1684  
1685  		BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1686  	}
1687  
1688  	skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1689  				   msg->msg_flags & MSG_DONTWAIT, &err,
1690  				   PAGE_ALLOC_COSTLY_ORDER);
1691  	if (skb == NULL)
1692  		goto out;
1693  
1694  	err = unix_scm_to_skb(&scm, skb, true);
1695  	if (err < 0)
1696  		goto out_free;
1697  	max_level = err + 1;
1698  
1699  	skb_put(skb, len - data_len);
1700  	skb->data_len = data_len;
1701  	skb->len = len;
1702  	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1703  	if (err)
1704  		goto out_free;
1705  
1706  	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1707  
1708  restart:
1709  	if (!other) {
1710  		err = -ECONNRESET;
1711  		if (sunaddr == NULL)
1712  			goto out_free;
1713  
1714  		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1715  					hash, &err);
1716  		if (other == NULL)
1717  			goto out_free;
1718  	}
1719  
1720  	if (sk_filter(other, skb) < 0) {
1721  		/* Toss the packet but do not return any error to the sender */
1722  		err = len;
1723  		goto out_free;
1724  	}
1725  
1726  	sk_locked = 0;
1727  	unix_state_lock(other);
1728  restart_locked:
1729  	err = -EPERM;
1730  	if (!unix_may_send(sk, other))
1731  		goto out_unlock;
1732  
1733  	if (unlikely(sock_flag(other, SOCK_DEAD))) {
1734  		/*
1735  		 *	Check with 1003.1g - what should
1736  		 *	datagram error
1737  		 */
1738  		unix_state_unlock(other);
1739  		sock_put(other);
1740  
1741  		if (!sk_locked)
1742  			unix_state_lock(sk);
1743  
1744  		err = 0;
1745  		if (unix_peer(sk) == other) {
1746  			unix_peer(sk) = NULL;
1747  			unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1748  
1749  			unix_state_unlock(sk);
1750  
1751  			unix_dgram_disconnected(sk, other);
1752  			sock_put(other);
1753  			err = -ECONNREFUSED;
1754  		} else {
1755  			unix_state_unlock(sk);
1756  		}
1757  
1758  		other = NULL;
1759  		if (err)
1760  			goto out_free;
1761  		goto restart;
1762  	}
1763  
1764  	err = -EPIPE;
1765  	if (other->sk_shutdown & RCV_SHUTDOWN)
1766  		goto out_unlock;
1767  
1768  	if (sk->sk_type != SOCK_SEQPACKET) {
1769  		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1770  		if (err)
1771  			goto out_unlock;
1772  	}
1773  
1774  	/* other == sk && unix_peer(other) != sk if
1775  	 * - unix_peer(sk) == NULL, destination address bound to sk
1776  	 * - unix_peer(sk) == sk by time of get but disconnected before lock
1777  	 */
1778  	if (other != sk &&
1779  	    unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1780  		if (timeo) {
1781  			timeo = unix_wait_for_peer(other, timeo);
1782  
1783  			err = sock_intr_errno(timeo);
1784  			if (signal_pending(current))
1785  				goto out_free;
1786  
1787  			goto restart;
1788  		}
1789  
1790  		if (!sk_locked) {
1791  			unix_state_unlock(other);
1792  			unix_state_double_lock(sk, other);
1793  		}
1794  
1795  		if (unix_peer(sk) != other ||
1796  		    unix_dgram_peer_wake_me(sk, other)) {
1797  			err = -EAGAIN;
1798  			sk_locked = 1;
1799  			goto out_unlock;
1800  		}
1801  
1802  		if (!sk_locked) {
1803  			sk_locked = 1;
1804  			goto restart_locked;
1805  		}
1806  	}
1807  
1808  	if (unlikely(sk_locked))
1809  		unix_state_unlock(sk);
1810  
1811  	if (sock_flag(other, SOCK_RCVTSTAMP))
1812  		__net_timestamp(skb);
1813  	maybe_add_creds(skb, sock, other);
1814  	skb_queue_tail(&other->sk_receive_queue, skb);
1815  	if (max_level > unix_sk(other)->recursion_level)
1816  		unix_sk(other)->recursion_level = max_level;
1817  	unix_state_unlock(other);
1818  	other->sk_data_ready(other);
1819  	sock_put(other);
1820  	scm_destroy(&scm);
1821  	return len;
1822  
1823  out_unlock:
1824  	if (sk_locked)
1825  		unix_state_unlock(sk);
1826  	unix_state_unlock(other);
1827  out_free:
1828  	kfree_skb(skb);
1829  out:
1830  	if (other)
1831  		sock_put(other);
1832  	scm_destroy(&scm);
1833  	return err;
1834  }
1835  
1836  /* We use paged skbs for stream sockets, and limit occupancy to 32768
1837   * bytes, and a minimun of a full page.
1838   */
1839  #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1840  
1841  static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1842  			       size_t len)
1843  {
1844  	struct sock *sk = sock->sk;
1845  	struct sock *other = NULL;
1846  	int err, size;
1847  	struct sk_buff *skb;
1848  	int sent = 0;
1849  	struct scm_cookie scm;
1850  	bool fds_sent = false;
1851  	int max_level;
1852  	int data_len;
1853  
1854  	wait_for_unix_gc();
1855  	err = scm_send(sock, msg, &scm, false);
1856  	if (err < 0)
1857  		return err;
1858  
1859  	err = -EOPNOTSUPP;
1860  	if (msg->msg_flags&MSG_OOB)
1861  		goto out_err;
1862  
1863  	if (msg->msg_namelen) {
1864  		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1865  		goto out_err;
1866  	} else {
1867  		err = -ENOTCONN;
1868  		other = unix_peer(sk);
1869  		if (!other)
1870  			goto out_err;
1871  	}
1872  
1873  	if (sk->sk_shutdown & SEND_SHUTDOWN)
1874  		goto pipe_err;
1875  
1876  	while (sent < len) {
1877  		size = len - sent;
1878  
1879  		/* Keep two messages in the pipe so it schedules better */
1880  		size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1881  
1882  		/* allow fallback to order-0 allocations */
1883  		size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1884  
1885  		data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1886  
1887  		data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1888  
1889  		skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1890  					   msg->msg_flags & MSG_DONTWAIT, &err,
1891  					   get_order(UNIX_SKB_FRAGS_SZ));
1892  		if (!skb)
1893  			goto out_err;
1894  
1895  		/* Only send the fds in the first buffer */
1896  		err = unix_scm_to_skb(&scm, skb, !fds_sent);
1897  		if (err < 0) {
1898  			kfree_skb(skb);
1899  			goto out_err;
1900  		}
1901  		max_level = err + 1;
1902  		fds_sent = true;
1903  
1904  		skb_put(skb, size - data_len);
1905  		skb->data_len = data_len;
1906  		skb->len = size;
1907  		err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1908  		if (err) {
1909  			kfree_skb(skb);
1910  			goto out_err;
1911  		}
1912  
1913  		unix_state_lock(other);
1914  
1915  		if (sock_flag(other, SOCK_DEAD) ||
1916  		    (other->sk_shutdown & RCV_SHUTDOWN))
1917  			goto pipe_err_free;
1918  
1919  		maybe_add_creds(skb, sock, other);
1920  		skb_queue_tail(&other->sk_receive_queue, skb);
1921  		if (max_level > unix_sk(other)->recursion_level)
1922  			unix_sk(other)->recursion_level = max_level;
1923  		unix_state_unlock(other);
1924  		other->sk_data_ready(other);
1925  		sent += size;
1926  	}
1927  
1928  	scm_destroy(&scm);
1929  
1930  	return sent;
1931  
1932  pipe_err_free:
1933  	unix_state_unlock(other);
1934  	kfree_skb(skb);
1935  pipe_err:
1936  	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1937  		send_sig(SIGPIPE, current, 0);
1938  	err = -EPIPE;
1939  out_err:
1940  	scm_destroy(&scm);
1941  	return sent ? : err;
1942  }
1943  
1944  static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1945  				    int offset, size_t size, int flags)
1946  {
1947  	int err;
1948  	bool send_sigpipe = false;
1949  	bool init_scm = true;
1950  	struct scm_cookie scm;
1951  	struct sock *other, *sk = socket->sk;
1952  	struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1953  
1954  	if (flags & MSG_OOB)
1955  		return -EOPNOTSUPP;
1956  
1957  	other = unix_peer(sk);
1958  	if (!other || sk->sk_state != TCP_ESTABLISHED)
1959  		return -ENOTCONN;
1960  
1961  	if (false) {
1962  alloc_skb:
1963  		unix_state_unlock(other);
1964  		mutex_unlock(&unix_sk(other)->iolock);
1965  		newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1966  					      &err, 0);
1967  		if (!newskb)
1968  			goto err;
1969  	}
1970  
1971  	/* we must acquire iolock as we modify already present
1972  	 * skbs in the sk_receive_queue and mess with skb->len
1973  	 */
1974  	err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1975  	if (err) {
1976  		err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1977  		goto err;
1978  	}
1979  
1980  	if (sk->sk_shutdown & SEND_SHUTDOWN) {
1981  		err = -EPIPE;
1982  		send_sigpipe = true;
1983  		goto err_unlock;
1984  	}
1985  
1986  	unix_state_lock(other);
1987  
1988  	if (sock_flag(other, SOCK_DEAD) ||
1989  	    other->sk_shutdown & RCV_SHUTDOWN) {
1990  		err = -EPIPE;
1991  		send_sigpipe = true;
1992  		goto err_state_unlock;
1993  	}
1994  
1995  	if (init_scm) {
1996  		err = maybe_init_creds(&scm, socket, other);
1997  		if (err)
1998  			goto err_state_unlock;
1999  		init_scm = false;
2000  	}
2001  
2002  	skb = skb_peek_tail(&other->sk_receive_queue);
2003  	if (tail && tail == skb) {
2004  		skb = newskb;
2005  	} else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2006  		if (newskb) {
2007  			skb = newskb;
2008  		} else {
2009  			tail = skb;
2010  			goto alloc_skb;
2011  		}
2012  	} else if (newskb) {
2013  		/* this is fast path, we don't necessarily need to
2014  		 * call to kfree_skb even though with newskb == NULL
2015  		 * this - does no harm
2016  		 */
2017  		consume_skb(newskb);
2018  		newskb = NULL;
2019  	}
2020  
2021  	if (skb_append_pagefrags(skb, page, offset, size)) {
2022  		tail = skb;
2023  		goto alloc_skb;
2024  	}
2025  
2026  	skb->len += size;
2027  	skb->data_len += size;
2028  	skb->truesize += size;
2029  	atomic_add(size, &sk->sk_wmem_alloc);
2030  
2031  	if (newskb) {
2032  		err = unix_scm_to_skb(&scm, skb, false);
2033  		if (err)
2034  			goto err_state_unlock;
2035  		spin_lock(&other->sk_receive_queue.lock);
2036  		__skb_queue_tail(&other->sk_receive_queue, newskb);
2037  		spin_unlock(&other->sk_receive_queue.lock);
2038  	}
2039  
2040  	unix_state_unlock(other);
2041  	mutex_unlock(&unix_sk(other)->iolock);
2042  
2043  	other->sk_data_ready(other);
2044  	scm_destroy(&scm);
2045  	return size;
2046  
2047  err_state_unlock:
2048  	unix_state_unlock(other);
2049  err_unlock:
2050  	mutex_unlock(&unix_sk(other)->iolock);
2051  err:
2052  	kfree_skb(newskb);
2053  	if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2054  		send_sig(SIGPIPE, current, 0);
2055  	if (!init_scm)
2056  		scm_destroy(&scm);
2057  	return err;
2058  }
2059  
2060  static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2061  				  size_t len)
2062  {
2063  	int err;
2064  	struct sock *sk = sock->sk;
2065  
2066  	err = sock_error(sk);
2067  	if (err)
2068  		return err;
2069  
2070  	if (sk->sk_state != TCP_ESTABLISHED)
2071  		return -ENOTCONN;
2072  
2073  	if (msg->msg_namelen)
2074  		msg->msg_namelen = 0;
2075  
2076  	return unix_dgram_sendmsg(sock, msg, len);
2077  }
2078  
2079  static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2080  				  size_t size, int flags)
2081  {
2082  	struct sock *sk = sock->sk;
2083  
2084  	if (sk->sk_state != TCP_ESTABLISHED)
2085  		return -ENOTCONN;
2086  
2087  	return unix_dgram_recvmsg(sock, msg, size, flags);
2088  }
2089  
2090  static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2091  {
2092  	struct unix_sock *u = unix_sk(sk);
2093  
2094  	if (u->addr) {
2095  		msg->msg_namelen = u->addr->len;
2096  		memcpy(msg->msg_name, u->addr->name, u->addr->len);
2097  	}
2098  }
2099  
2100  static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2101  			      size_t size, int flags)
2102  {
2103  	struct scm_cookie scm;
2104  	struct sock *sk = sock->sk;
2105  	struct unix_sock *u = unix_sk(sk);
2106  	struct sk_buff *skb, *last;
2107  	long timeo;
2108  	int err;
2109  	int peeked, skip;
2110  
2111  	err = -EOPNOTSUPP;
2112  	if (flags&MSG_OOB)
2113  		goto out;
2114  
2115  	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2116  
2117  	do {
2118  		mutex_lock(&u->iolock);
2119  
2120  		skip = sk_peek_offset(sk, flags);
2121  		skb = __skb_try_recv_datagram(sk, flags, NULL, &peeked, &skip,
2122  					      &err, &last);
2123  		if (skb)
2124  			break;
2125  
2126  		mutex_unlock(&u->iolock);
2127  
2128  		if (err != -EAGAIN)
2129  			break;
2130  	} while (timeo &&
2131  		 !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2132  
2133  	if (!skb) { /* implies iolock unlocked */
2134  		unix_state_lock(sk);
2135  		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2136  		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2137  		    (sk->sk_shutdown & RCV_SHUTDOWN))
2138  			err = 0;
2139  		unix_state_unlock(sk);
2140  		goto out;
2141  	}
2142  
2143  	if (wq_has_sleeper(&u->peer_wait))
2144  		wake_up_interruptible_sync_poll(&u->peer_wait,
2145  						POLLOUT | POLLWRNORM |
2146  						POLLWRBAND);
2147  
2148  	if (msg->msg_name)
2149  		unix_copy_addr(msg, skb->sk);
2150  
2151  	if (size > skb->len - skip)
2152  		size = skb->len - skip;
2153  	else if (size < skb->len - skip)
2154  		msg->msg_flags |= MSG_TRUNC;
2155  
2156  	err = skb_copy_datagram_msg(skb, skip, msg, size);
2157  	if (err)
2158  		goto out_free;
2159  
2160  	if (sock_flag(sk, SOCK_RCVTSTAMP))
2161  		__sock_recv_timestamp(msg, sk, skb);
2162  
2163  	memset(&scm, 0, sizeof(scm));
2164  
2165  	scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2166  	unix_set_secdata(&scm, skb);
2167  
2168  	if (!(flags & MSG_PEEK)) {
2169  		if (UNIXCB(skb).fp)
2170  			unix_detach_fds(&scm, skb);
2171  
2172  		sk_peek_offset_bwd(sk, skb->len);
2173  	} else {
2174  		/* It is questionable: on PEEK we could:
2175  		   - do not return fds - good, but too simple 8)
2176  		   - return fds, and do not return them on read (old strategy,
2177  		     apparently wrong)
2178  		   - clone fds (I chose it for now, it is the most universal
2179  		     solution)
2180  
2181  		   POSIX 1003.1g does not actually define this clearly
2182  		   at all. POSIX 1003.1g doesn't define a lot of things
2183  		   clearly however!
2184  
2185  		*/
2186  
2187  		sk_peek_offset_fwd(sk, size);
2188  
2189  		if (UNIXCB(skb).fp)
2190  			scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2191  	}
2192  	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2193  
2194  	scm_recv(sock, msg, &scm, flags);
2195  
2196  out_free:
2197  	skb_free_datagram(sk, skb);
2198  	mutex_unlock(&u->iolock);
2199  out:
2200  	return err;
2201  }
2202  
2203  /*
2204   *	Sleep until more data has arrived. But check for races..
2205   */
2206  static long unix_stream_data_wait(struct sock *sk, long timeo,
2207  				  struct sk_buff *last, unsigned int last_len,
2208  				  bool freezable)
2209  {
2210  	struct sk_buff *tail;
2211  	DEFINE_WAIT(wait);
2212  
2213  	unix_state_lock(sk);
2214  
2215  	for (;;) {
2216  		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2217  
2218  		tail = skb_peek_tail(&sk->sk_receive_queue);
2219  		if (tail != last ||
2220  		    (tail && tail->len != last_len) ||
2221  		    sk->sk_err ||
2222  		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
2223  		    signal_pending(current) ||
2224  		    !timeo)
2225  			break;
2226  
2227  		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2228  		unix_state_unlock(sk);
2229  		if (freezable)
2230  			timeo = freezable_schedule_timeout(timeo);
2231  		else
2232  			timeo = schedule_timeout(timeo);
2233  		unix_state_lock(sk);
2234  
2235  		if (sock_flag(sk, SOCK_DEAD))
2236  			break;
2237  
2238  		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2239  	}
2240  
2241  	finish_wait(sk_sleep(sk), &wait);
2242  	unix_state_unlock(sk);
2243  	return timeo;
2244  }
2245  
2246  static unsigned int unix_skb_len(const struct sk_buff *skb)
2247  {
2248  	return skb->len - UNIXCB(skb).consumed;
2249  }
2250  
2251  struct unix_stream_read_state {
2252  	int (*recv_actor)(struct sk_buff *, int, int,
2253  			  struct unix_stream_read_state *);
2254  	struct socket *socket;
2255  	struct msghdr *msg;
2256  	struct pipe_inode_info *pipe;
2257  	size_t size;
2258  	int flags;
2259  	unsigned int splice_flags;
2260  };
2261  
2262  static int unix_stream_read_generic(struct unix_stream_read_state *state,
2263  				    bool freezable)
2264  {
2265  	struct scm_cookie scm;
2266  	struct socket *sock = state->socket;
2267  	struct sock *sk = sock->sk;
2268  	struct unix_sock *u = unix_sk(sk);
2269  	int copied = 0;
2270  	int flags = state->flags;
2271  	int noblock = flags & MSG_DONTWAIT;
2272  	bool check_creds = false;
2273  	int target;
2274  	int err = 0;
2275  	long timeo;
2276  	int skip;
2277  	size_t size = state->size;
2278  	unsigned int last_len;
2279  
2280  	if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2281  		err = -EINVAL;
2282  		goto out;
2283  	}
2284  
2285  	if (unlikely(flags & MSG_OOB)) {
2286  		err = -EOPNOTSUPP;
2287  		goto out;
2288  	}
2289  
2290  	target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2291  	timeo = sock_rcvtimeo(sk, noblock);
2292  
2293  	memset(&scm, 0, sizeof(scm));
2294  
2295  	/* Lock the socket to prevent queue disordering
2296  	 * while sleeps in memcpy_tomsg
2297  	 */
2298  	mutex_lock(&u->iolock);
2299  
2300  	if (flags & MSG_PEEK)
2301  		skip = sk_peek_offset(sk, flags);
2302  	else
2303  		skip = 0;
2304  
2305  	do {
2306  		int chunk;
2307  		bool drop_skb;
2308  		struct sk_buff *skb, *last;
2309  
2310  redo:
2311  		unix_state_lock(sk);
2312  		if (sock_flag(sk, SOCK_DEAD)) {
2313  			err = -ECONNRESET;
2314  			goto unlock;
2315  		}
2316  		last = skb = skb_peek(&sk->sk_receive_queue);
2317  		last_len = last ? last->len : 0;
2318  again:
2319  		if (skb == NULL) {
2320  			unix_sk(sk)->recursion_level = 0;
2321  			if (copied >= target)
2322  				goto unlock;
2323  
2324  			/*
2325  			 *	POSIX 1003.1g mandates this order.
2326  			 */
2327  
2328  			err = sock_error(sk);
2329  			if (err)
2330  				goto unlock;
2331  			if (sk->sk_shutdown & RCV_SHUTDOWN)
2332  				goto unlock;
2333  
2334  			unix_state_unlock(sk);
2335  			if (!timeo) {
2336  				err = -EAGAIN;
2337  				break;
2338  			}
2339  
2340  			mutex_unlock(&u->iolock);
2341  
2342  			timeo = unix_stream_data_wait(sk, timeo, last,
2343  						      last_len, freezable);
2344  
2345  			if (signal_pending(current)) {
2346  				err = sock_intr_errno(timeo);
2347  				scm_destroy(&scm);
2348  				goto out;
2349  			}
2350  
2351  			mutex_lock(&u->iolock);
2352  			goto redo;
2353  unlock:
2354  			unix_state_unlock(sk);
2355  			break;
2356  		}
2357  
2358  		while (skip >= unix_skb_len(skb)) {
2359  			skip -= unix_skb_len(skb);
2360  			last = skb;
2361  			last_len = skb->len;
2362  			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2363  			if (!skb)
2364  				goto again;
2365  		}
2366  
2367  		unix_state_unlock(sk);
2368  
2369  		if (check_creds) {
2370  			/* Never glue messages from different writers */
2371  			if (!unix_skb_scm_eq(skb, &scm))
2372  				break;
2373  		} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2374  			/* Copy credentials */
2375  			scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2376  			unix_set_secdata(&scm, skb);
2377  			check_creds = true;
2378  		}
2379  
2380  		/* Copy address just once */
2381  		if (state->msg && state->msg->msg_name) {
2382  			DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2383  					 state->msg->msg_name);
2384  			unix_copy_addr(state->msg, skb->sk);
2385  			sunaddr = NULL;
2386  		}
2387  
2388  		chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2389  		skb_get(skb);
2390  		chunk = state->recv_actor(skb, skip, chunk, state);
2391  		drop_skb = !unix_skb_len(skb);
2392  		/* skb is only safe to use if !drop_skb */
2393  		consume_skb(skb);
2394  		if (chunk < 0) {
2395  			if (copied == 0)
2396  				copied = -EFAULT;
2397  			break;
2398  		}
2399  		copied += chunk;
2400  		size -= chunk;
2401  
2402  		if (drop_skb) {
2403  			/* the skb was touched by a concurrent reader;
2404  			 * we should not expect anything from this skb
2405  			 * anymore and assume it invalid - we can be
2406  			 * sure it was dropped from the socket queue
2407  			 *
2408  			 * let's report a short read
2409  			 */
2410  			err = 0;
2411  			break;
2412  		}
2413  
2414  		/* Mark read part of skb as used */
2415  		if (!(flags & MSG_PEEK)) {
2416  			UNIXCB(skb).consumed += chunk;
2417  
2418  			sk_peek_offset_bwd(sk, chunk);
2419  
2420  			if (UNIXCB(skb).fp)
2421  				unix_detach_fds(&scm, skb);
2422  
2423  			if (unix_skb_len(skb))
2424  				break;
2425  
2426  			skb_unlink(skb, &sk->sk_receive_queue);
2427  			consume_skb(skb);
2428  
2429  			if (scm.fp)
2430  				break;
2431  		} else {
2432  			/* It is questionable, see note in unix_dgram_recvmsg.
2433  			 */
2434  			if (UNIXCB(skb).fp)
2435  				scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2436  
2437  			sk_peek_offset_fwd(sk, chunk);
2438  
2439  			if (UNIXCB(skb).fp)
2440  				break;
2441  
2442  			skip = 0;
2443  			last = skb;
2444  			last_len = skb->len;
2445  			unix_state_lock(sk);
2446  			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2447  			if (skb)
2448  				goto again;
2449  			unix_state_unlock(sk);
2450  			break;
2451  		}
2452  	} while (size);
2453  
2454  	mutex_unlock(&u->iolock);
2455  	if (state->msg)
2456  		scm_recv(sock, state->msg, &scm, flags);
2457  	else
2458  		scm_destroy(&scm);
2459  out:
2460  	return copied ? : err;
2461  }
2462  
2463  static int unix_stream_read_actor(struct sk_buff *skb,
2464  				  int skip, int chunk,
2465  				  struct unix_stream_read_state *state)
2466  {
2467  	int ret;
2468  
2469  	ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2470  				    state->msg, chunk);
2471  	return ret ?: chunk;
2472  }
2473  
2474  static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2475  			       size_t size, int flags)
2476  {
2477  	struct unix_stream_read_state state = {
2478  		.recv_actor = unix_stream_read_actor,
2479  		.socket = sock,
2480  		.msg = msg,
2481  		.size = size,
2482  		.flags = flags
2483  	};
2484  
2485  	return unix_stream_read_generic(&state, true);
2486  }
2487  
2488  static int unix_stream_splice_actor(struct sk_buff *skb,
2489  				    int skip, int chunk,
2490  				    struct unix_stream_read_state *state)
2491  {
2492  	return skb_splice_bits(skb, state->socket->sk,
2493  			       UNIXCB(skb).consumed + skip,
2494  			       state->pipe, chunk, state->splice_flags);
2495  }
2496  
2497  static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2498  				       struct pipe_inode_info *pipe,
2499  				       size_t size, unsigned int flags)
2500  {
2501  	struct unix_stream_read_state state = {
2502  		.recv_actor = unix_stream_splice_actor,
2503  		.socket = sock,
2504  		.pipe = pipe,
2505  		.size = size,
2506  		.splice_flags = flags,
2507  	};
2508  
2509  	if (unlikely(*ppos))
2510  		return -ESPIPE;
2511  
2512  	if (sock->file->f_flags & O_NONBLOCK ||
2513  	    flags & SPLICE_F_NONBLOCK)
2514  		state.flags = MSG_DONTWAIT;
2515  
2516  	return unix_stream_read_generic(&state, false);
2517  }
2518  
2519  static int unix_shutdown(struct socket *sock, int mode)
2520  {
2521  	struct sock *sk = sock->sk;
2522  	struct sock *other;
2523  
2524  	if (mode < SHUT_RD || mode > SHUT_RDWR)
2525  		return -EINVAL;
2526  	/* This maps:
2527  	 * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2528  	 * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2529  	 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2530  	 */
2531  	++mode;
2532  
2533  	unix_state_lock(sk);
2534  	sk->sk_shutdown |= mode;
2535  	other = unix_peer(sk);
2536  	if (other)
2537  		sock_hold(other);
2538  	unix_state_unlock(sk);
2539  	sk->sk_state_change(sk);
2540  
2541  	if (other &&
2542  		(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2543  
2544  		int peer_mode = 0;
2545  
2546  		if (mode&RCV_SHUTDOWN)
2547  			peer_mode |= SEND_SHUTDOWN;
2548  		if (mode&SEND_SHUTDOWN)
2549  			peer_mode |= RCV_SHUTDOWN;
2550  		unix_state_lock(other);
2551  		other->sk_shutdown |= peer_mode;
2552  		unix_state_unlock(other);
2553  		other->sk_state_change(other);
2554  		if (peer_mode == SHUTDOWN_MASK)
2555  			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2556  		else if (peer_mode & RCV_SHUTDOWN)
2557  			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2558  	}
2559  	if (other)
2560  		sock_put(other);
2561  
2562  	return 0;
2563  }
2564  
2565  long unix_inq_len(struct sock *sk)
2566  {
2567  	struct sk_buff *skb;
2568  	long amount = 0;
2569  
2570  	if (sk->sk_state == TCP_LISTEN)
2571  		return -EINVAL;
2572  
2573  	spin_lock(&sk->sk_receive_queue.lock);
2574  	if (sk->sk_type == SOCK_STREAM ||
2575  	    sk->sk_type == SOCK_SEQPACKET) {
2576  		skb_queue_walk(&sk->sk_receive_queue, skb)
2577  			amount += unix_skb_len(skb);
2578  	} else {
2579  		skb = skb_peek(&sk->sk_receive_queue);
2580  		if (skb)
2581  			amount = skb->len;
2582  	}
2583  	spin_unlock(&sk->sk_receive_queue.lock);
2584  
2585  	return amount;
2586  }
2587  EXPORT_SYMBOL_GPL(unix_inq_len);
2588  
2589  long unix_outq_len(struct sock *sk)
2590  {
2591  	return sk_wmem_alloc_get(sk);
2592  }
2593  EXPORT_SYMBOL_GPL(unix_outq_len);
2594  
2595  static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2596  {
2597  	struct sock *sk = sock->sk;
2598  	long amount = 0;
2599  	int err;
2600  
2601  	switch (cmd) {
2602  	case SIOCOUTQ:
2603  		amount = unix_outq_len(sk);
2604  		err = put_user(amount, (int __user *)arg);
2605  		break;
2606  	case SIOCINQ:
2607  		amount = unix_inq_len(sk);
2608  		if (amount < 0)
2609  			err = amount;
2610  		else
2611  			err = put_user(amount, (int __user *)arg);
2612  		break;
2613  	default:
2614  		err = -ENOIOCTLCMD;
2615  		break;
2616  	}
2617  	return err;
2618  }
2619  
2620  static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2621  {
2622  	struct sock *sk = sock->sk;
2623  	unsigned int mask;
2624  
2625  	sock_poll_wait(file, sk_sleep(sk), wait);
2626  	mask = 0;
2627  
2628  	/* exceptional events? */
2629  	if (sk->sk_err)
2630  		mask |= POLLERR;
2631  	if (sk->sk_shutdown == SHUTDOWN_MASK)
2632  		mask |= POLLHUP;
2633  	if (sk->sk_shutdown & RCV_SHUTDOWN)
2634  		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2635  
2636  	/* readable? */
2637  	if (!skb_queue_empty(&sk->sk_receive_queue))
2638  		mask |= POLLIN | POLLRDNORM;
2639  
2640  	/* Connection-based need to check for termination and startup */
2641  	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2642  	    sk->sk_state == TCP_CLOSE)
2643  		mask |= POLLHUP;
2644  
2645  	/*
2646  	 * we set writable also when the other side has shut down the
2647  	 * connection. This prevents stuck sockets.
2648  	 */
2649  	if (unix_writable(sk))
2650  		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2651  
2652  	return mask;
2653  }
2654  
2655  static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2656  				    poll_table *wait)
2657  {
2658  	struct sock *sk = sock->sk, *other;
2659  	unsigned int mask, writable;
2660  
2661  	sock_poll_wait(file, sk_sleep(sk), wait);
2662  	mask = 0;
2663  
2664  	/* exceptional events? */
2665  	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2666  		mask |= POLLERR |
2667  			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
2668  
2669  	if (sk->sk_shutdown & RCV_SHUTDOWN)
2670  		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2671  	if (sk->sk_shutdown == SHUTDOWN_MASK)
2672  		mask |= POLLHUP;
2673  
2674  	/* readable? */
2675  	if (!skb_queue_empty(&sk->sk_receive_queue))
2676  		mask |= POLLIN | POLLRDNORM;
2677  
2678  	/* Connection-based need to check for termination and startup */
2679  	if (sk->sk_type == SOCK_SEQPACKET) {
2680  		if (sk->sk_state == TCP_CLOSE)
2681  			mask |= POLLHUP;
2682  		/* connection hasn't started yet? */
2683  		if (sk->sk_state == TCP_SYN_SENT)
2684  			return mask;
2685  	}
2686  
2687  	/* No write status requested, avoid expensive OUT tests. */
2688  	if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
2689  		return mask;
2690  
2691  	writable = unix_writable(sk);
2692  	if (writable) {
2693  		unix_state_lock(sk);
2694  
2695  		other = unix_peer(sk);
2696  		if (other && unix_peer(other) != sk &&
2697  		    unix_recvq_full(other) &&
2698  		    unix_dgram_peer_wake_me(sk, other))
2699  			writable = 0;
2700  
2701  		unix_state_unlock(sk);
2702  	}
2703  
2704  	if (writable)
2705  		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2706  	else
2707  		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2708  
2709  	return mask;
2710  }
2711  
2712  #ifdef CONFIG_PROC_FS
2713  
2714  #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2715  
2716  #define get_bucket(x) ((x) >> BUCKET_SPACE)
2717  #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2718  #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2719  
2720  static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2721  {
2722  	unsigned long offset = get_offset(*pos);
2723  	unsigned long bucket = get_bucket(*pos);
2724  	struct sock *sk;
2725  	unsigned long count = 0;
2726  
2727  	for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2728  		if (sock_net(sk) != seq_file_net(seq))
2729  			continue;
2730  		if (++count == offset)
2731  			break;
2732  	}
2733  
2734  	return sk;
2735  }
2736  
2737  static struct sock *unix_next_socket(struct seq_file *seq,
2738  				     struct sock *sk,
2739  				     loff_t *pos)
2740  {
2741  	unsigned long bucket;
2742  
2743  	while (sk > (struct sock *)SEQ_START_TOKEN) {
2744  		sk = sk_next(sk);
2745  		if (!sk)
2746  			goto next_bucket;
2747  		if (sock_net(sk) == seq_file_net(seq))
2748  			return sk;
2749  	}
2750  
2751  	do {
2752  		sk = unix_from_bucket(seq, pos);
2753  		if (sk)
2754  			return sk;
2755  
2756  next_bucket:
2757  		bucket = get_bucket(*pos) + 1;
2758  		*pos = set_bucket_offset(bucket, 1);
2759  	} while (bucket < ARRAY_SIZE(unix_socket_table));
2760  
2761  	return NULL;
2762  }
2763  
2764  static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2765  	__acquires(unix_table_lock)
2766  {
2767  	spin_lock(&unix_table_lock);
2768  
2769  	if (!*pos)
2770  		return SEQ_START_TOKEN;
2771  
2772  	if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2773  		return NULL;
2774  
2775  	return unix_next_socket(seq, NULL, pos);
2776  }
2777  
2778  static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2779  {
2780  	++*pos;
2781  	return unix_next_socket(seq, v, pos);
2782  }
2783  
2784  static void unix_seq_stop(struct seq_file *seq, void *v)
2785  	__releases(unix_table_lock)
2786  {
2787  	spin_unlock(&unix_table_lock);
2788  }
2789  
2790  static int unix_seq_show(struct seq_file *seq, void *v)
2791  {
2792  
2793  	if (v == SEQ_START_TOKEN)
2794  		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2795  			 "Inode Path\n");
2796  	else {
2797  		struct sock *s = v;
2798  		struct unix_sock *u = unix_sk(s);
2799  		unix_state_lock(s);
2800  
2801  		seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2802  			s,
2803  			atomic_read(&s->sk_refcnt),
2804  			0,
2805  			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2806  			s->sk_type,
2807  			s->sk_socket ?
2808  			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2809  			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2810  			sock_i_ino(s));
2811  
2812  		if (u->addr) {
2813  			int i, len;
2814  			seq_putc(seq, ' ');
2815  
2816  			i = 0;
2817  			len = u->addr->len - sizeof(short);
2818  			if (!UNIX_ABSTRACT(s))
2819  				len--;
2820  			else {
2821  				seq_putc(seq, '@');
2822  				i++;
2823  			}
2824  			for ( ; i < len; i++)
2825  				seq_putc(seq, u->addr->name->sun_path[i] ?:
2826  					 '@');
2827  		}
2828  		unix_state_unlock(s);
2829  		seq_putc(seq, '\n');
2830  	}
2831  
2832  	return 0;
2833  }
2834  
2835  static const struct seq_operations unix_seq_ops = {
2836  	.start  = unix_seq_start,
2837  	.next   = unix_seq_next,
2838  	.stop   = unix_seq_stop,
2839  	.show   = unix_seq_show,
2840  };
2841  
2842  static int unix_seq_open(struct inode *inode, struct file *file)
2843  {
2844  	return seq_open_net(inode, file, &unix_seq_ops,
2845  			    sizeof(struct seq_net_private));
2846  }
2847  
2848  static const struct file_operations unix_seq_fops = {
2849  	.owner		= THIS_MODULE,
2850  	.open		= unix_seq_open,
2851  	.read		= seq_read,
2852  	.llseek		= seq_lseek,
2853  	.release	= seq_release_net,
2854  };
2855  
2856  #endif
2857  
2858  static const struct net_proto_family unix_family_ops = {
2859  	.family = PF_UNIX,
2860  	.create = unix_create,
2861  	.owner	= THIS_MODULE,
2862  };
2863  
2864  
2865  static int __net_init unix_net_init(struct net *net)
2866  {
2867  	int error = -ENOMEM;
2868  
2869  	net->unx.sysctl_max_dgram_qlen = 10;
2870  	if (unix_sysctl_register(net))
2871  		goto out;
2872  
2873  #ifdef CONFIG_PROC_FS
2874  	if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
2875  		unix_sysctl_unregister(net);
2876  		goto out;
2877  	}
2878  #endif
2879  	error = 0;
2880  out:
2881  	return error;
2882  }
2883  
2884  static void __net_exit unix_net_exit(struct net *net)
2885  {
2886  	unix_sysctl_unregister(net);
2887  	remove_proc_entry("unix", net->proc_net);
2888  }
2889  
2890  static struct pernet_operations unix_net_ops = {
2891  	.init = unix_net_init,
2892  	.exit = unix_net_exit,
2893  };
2894  
2895  static int __init af_unix_init(void)
2896  {
2897  	int rc = -1;
2898  
2899  	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2900  
2901  	rc = proto_register(&unix_proto, 1);
2902  	if (rc != 0) {
2903  		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2904  		goto out;
2905  	}
2906  
2907  	sock_register(&unix_family_ops);
2908  	register_pernet_subsys(&unix_net_ops);
2909  out:
2910  	return rc;
2911  }
2912  
2913  static void __exit af_unix_exit(void)
2914  {
2915  	sock_unregister(PF_UNIX);
2916  	proto_unregister(&unix_proto);
2917  	unregister_pernet_subsys(&unix_net_ops);
2918  }
2919  
2920  /* Earlier than device_initcall() so that other drivers invoking
2921     request_module() don't end up in a loop when modprobe tries
2922     to use a UNIX socket. But later than subsys_initcall() because
2923     we depend on stuff initialised there */
2924  fs_initcall(af_unix_init);
2925  module_exit(af_unix_exit);
2926  
2927  MODULE_LICENSE("GPL");
2928  MODULE_ALIAS_NETPROTO(PF_UNIX);
2929