xref: /linux/net/unix/af_unix.c (revision d39d0ed196aa1685bb24771e92f78633c66ac9cb)
1 /*
2  * NET4:	Implementation of BSD Unix domain sockets.
3  *
4  * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *
6  *		This program is free software; you can redistribute it and/or
7  *		modify it under the terms of the GNU General Public License
8  *		as published by the Free Software Foundation; either version
9  *		2 of the License, or (at your option) any later version.
10  *
11  * Fixes:
12  *		Linus Torvalds	:	Assorted bug cures.
13  *		Niibe Yutaka	:	async I/O support.
14  *		Carsten Paeth	:	PF_UNIX check, address fixes.
15  *		Alan Cox	:	Limit size of allocated blocks.
16  *		Alan Cox	:	Fixed the stupid socketpair bug.
17  *		Alan Cox	:	BSD compatibility fine tuning.
18  *		Alan Cox	:	Fixed a bug in connect when interrupted.
19  *		Alan Cox	:	Sorted out a proper draft version of
20  *					file descriptor passing hacked up from
21  *					Mike Shaver's work.
22  *		Marty Leisner	:	Fixes to fd passing
23  *		Nick Nevin	:	recvmsg bugfix.
24  *		Alan Cox	:	Started proper garbage collector
25  *		Heiko EiBfeldt	:	Missing verify_area check
26  *		Alan Cox	:	Started POSIXisms
27  *		Andreas Schwab	:	Replace inode by dentry for proper
28  *					reference counting
29  *		Kirk Petersen	:	Made this a module
30  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
31  *					Lots of bug fixes.
32  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
33  *					by above two patches.
34  *	     Andrea Arcangeli	:	If possible we block in connect(2)
35  *					if the max backlog of the listen socket
36  *					is been reached. This won't break
37  *					old apps and it will avoid huge amount
38  *					of socks hashed (this for unix_gc()
39  *					performances reasons).
40  *					Security fix that limits the max
41  *					number of socks to 2*max_files and
42  *					the number of skb queueable in the
43  *					dgram receiver.
44  *		Artur Skawina   :	Hash function optimizations
45  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
46  *	      Malcolm Beattie   :	Set peercred for socketpair
47  *	     Michal Ostrowski   :       Module initialization cleanup.
48  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
49  *	     				the core infrastructure is doing that
50  *	     				for all net proto families now (2.5.69+)
51  *
52  *
53  * Known differences from reference BSD that was tested:
54  *
55  *	[TO FIX]
56  *	ECONNREFUSED is not returned from one end of a connected() socket to the
57  *		other the moment one end closes.
58  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
59  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
60  *	[NOT TO FIX]
61  *	accept() returns a path name even if the connecting socket has closed
62  *		in the meantime (BSD loses the path and gives up).
63  *	accept() returns 0 length path for an unbound connector. BSD returns 16
64  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
66  *	BSD af_unix apparently has connect forgetting to block properly.
67  *		(need to check this with the POSIX spec in detail)
68  *
69  * Differences from 2.0.0-11-... (ANK)
70  *	Bug fixes and improvements.
71  *		- client shutdown killed server socket.
72  *		- removed all useless cli/sti pairs.
73  *
74  *	Semantic changes/extensions.
75  *		- generic control message passing.
76  *		- SCM_CREDENTIALS control message.
77  *		- "Abstract" (not FS based) socket bindings.
78  *		  Abstract names are sequences of bytes (not zero terminated)
79  *		  started by 0, so that this name space does not intersect
80  *		  with BSD names.
81  */
82 
83 #include <linux/module.h>
84 #include <linux/kernel.h>
85 #include <linux/signal.h>
86 #include <linux/sched.h>
87 #include <linux/errno.h>
88 #include <linux/string.h>
89 #include <linux/stat.h>
90 #include <linux/dcache.h>
91 #include <linux/namei.h>
92 #include <linux/socket.h>
93 #include <linux/un.h>
94 #include <linux/fcntl.h>
95 #include <linux/termios.h>
96 #include <linux/sockios.h>
97 #include <linux/net.h>
98 #include <linux/in.h>
99 #include <linux/fs.h>
100 #include <linux/slab.h>
101 #include <asm/uaccess.h>
102 #include <linux/skbuff.h>
103 #include <linux/netdevice.h>
104 #include <net/net_namespace.h>
105 #include <net/sock.h>
106 #include <net/tcp_states.h>
107 #include <net/af_unix.h>
108 #include <linux/proc_fs.h>
109 #include <linux/seq_file.h>
110 #include <net/scm.h>
111 #include <linux/init.h>
112 #include <linux/poll.h>
113 #include <linux/rtnetlink.h>
114 #include <linux/mount.h>
115 #include <net/checksum.h>
116 #include <linux/security.h>
117 
118 static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
119 static DEFINE_SPINLOCK(unix_table_lock);
120 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
121 
122 #define unix_sockets_unbound	(&unix_socket_table[UNIX_HASH_SIZE])
123 
124 #define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
125 
126 #ifdef CONFIG_SECURITY_NETWORK
127 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
128 {
129 	memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
130 }
131 
132 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
133 {
134 	scm->secid = *UNIXSID(skb);
135 }
136 #else
137 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
138 { }
139 
140 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
141 { }
142 #endif /* CONFIG_SECURITY_NETWORK */
143 
144 /*
145  *  SMP locking strategy:
146  *    hash table is protected with spinlock unix_table_lock
147  *    each socket state is protected by separate spin lock.
148  */
149 
150 static inline unsigned unix_hash_fold(__wsum n)
151 {
152 	unsigned hash = (__force unsigned)n;
153 	hash ^= hash>>16;
154 	hash ^= hash>>8;
155 	return hash&(UNIX_HASH_SIZE-1);
156 }
157 
158 #define unix_peer(sk) (unix_sk(sk)->peer)
159 
160 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
161 {
162 	return unix_peer(osk) == sk;
163 }
164 
165 static inline int unix_may_send(struct sock *sk, struct sock *osk)
166 {
167 	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
168 }
169 
170 static inline int unix_recvq_full(struct sock const *sk)
171 {
172 	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
173 }
174 
175 static struct sock *unix_peer_get(struct sock *s)
176 {
177 	struct sock *peer;
178 
179 	unix_state_lock(s);
180 	peer = unix_peer(s);
181 	if (peer)
182 		sock_hold(peer);
183 	unix_state_unlock(s);
184 	return peer;
185 }
186 
187 static inline void unix_release_addr(struct unix_address *addr)
188 {
189 	if (atomic_dec_and_test(&addr->refcnt))
190 		kfree(addr);
191 }
192 
193 /*
194  *	Check unix socket name:
195  *		- should be not zero length.
196  *	        - if started by not zero, should be NULL terminated (FS object)
197  *		- if started by zero, it is abstract name.
198  */
199 
200 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp)
201 {
202 	if (len <= sizeof(short) || len > sizeof(*sunaddr))
203 		return -EINVAL;
204 	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
205 		return -EINVAL;
206 	if (sunaddr->sun_path[0]) {
207 		/*
208 		 * This may look like an off by one error but it is a bit more
209 		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
210 		 * sun_path[108] doesnt as such exist.  However in kernel space
211 		 * we are guaranteed that it is a valid memory location in our
212 		 * kernel address buffer.
213 		 */
214 		((char *)sunaddr)[len] = 0;
215 		len = strlen(sunaddr->sun_path)+1+sizeof(short);
216 		return len;
217 	}
218 
219 	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
220 	return len;
221 }
222 
223 static void __unix_remove_socket(struct sock *sk)
224 {
225 	sk_del_node_init(sk);
226 }
227 
228 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
229 {
230 	WARN_ON(!sk_unhashed(sk));
231 	sk_add_node(sk, list);
232 }
233 
234 static inline void unix_remove_socket(struct sock *sk)
235 {
236 	spin_lock(&unix_table_lock);
237 	__unix_remove_socket(sk);
238 	spin_unlock(&unix_table_lock);
239 }
240 
241 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
242 {
243 	spin_lock(&unix_table_lock);
244 	__unix_insert_socket(list, sk);
245 	spin_unlock(&unix_table_lock);
246 }
247 
248 static struct sock *__unix_find_socket_byname(struct net *net,
249 					      struct sockaddr_un *sunname,
250 					      int len, int type, unsigned hash)
251 {
252 	struct sock *s;
253 	struct hlist_node *node;
254 
255 	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
256 		struct unix_sock *u = unix_sk(s);
257 
258 		if (!net_eq(sock_net(s), net))
259 			continue;
260 
261 		if (u->addr->len == len &&
262 		    !memcmp(u->addr->name, sunname, len))
263 			goto found;
264 	}
265 	s = NULL;
266 found:
267 	return s;
268 }
269 
270 static inline struct sock *unix_find_socket_byname(struct net *net,
271 						   struct sockaddr_un *sunname,
272 						   int len, int type,
273 						   unsigned hash)
274 {
275 	struct sock *s;
276 
277 	spin_lock(&unix_table_lock);
278 	s = __unix_find_socket_byname(net, sunname, len, type, hash);
279 	if (s)
280 		sock_hold(s);
281 	spin_unlock(&unix_table_lock);
282 	return s;
283 }
284 
285 static struct sock *unix_find_socket_byinode(struct inode *i)
286 {
287 	struct sock *s;
288 	struct hlist_node *node;
289 
290 	spin_lock(&unix_table_lock);
291 	sk_for_each(s, node,
292 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
293 		struct dentry *dentry = unix_sk(s)->dentry;
294 
295 		if (dentry && dentry->d_inode == i) {
296 			sock_hold(s);
297 			goto found;
298 		}
299 	}
300 	s = NULL;
301 found:
302 	spin_unlock(&unix_table_lock);
303 	return s;
304 }
305 
306 static inline int unix_writable(struct sock *sk)
307 {
308 	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
309 }
310 
311 static void unix_write_space(struct sock *sk)
312 {
313 	struct socket_wq *wq;
314 
315 	rcu_read_lock();
316 	if (unix_writable(sk)) {
317 		wq = rcu_dereference(sk->sk_wq);
318 		if (wq_has_sleeper(wq))
319 			wake_up_interruptible_sync(&wq->wait);
320 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
321 	}
322 	rcu_read_unlock();
323 }
324 
325 /* When dgram socket disconnects (or changes its peer), we clear its receive
326  * queue of packets arrived from previous peer. First, it allows to do
327  * flow control based only on wmem_alloc; second, sk connected to peer
328  * may receive messages only from that peer. */
329 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
330 {
331 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
332 		skb_queue_purge(&sk->sk_receive_queue);
333 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
334 
335 		/* If one link of bidirectional dgram pipe is disconnected,
336 		 * we signal error. Messages are lost. Do not make this,
337 		 * when peer was not connected to us.
338 		 */
339 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
340 			other->sk_err = ECONNRESET;
341 			other->sk_error_report(other);
342 		}
343 	}
344 }
345 
346 static void unix_sock_destructor(struct sock *sk)
347 {
348 	struct unix_sock *u = unix_sk(sk);
349 
350 	skb_queue_purge(&sk->sk_receive_queue);
351 
352 	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
353 	WARN_ON(!sk_unhashed(sk));
354 	WARN_ON(sk->sk_socket);
355 	if (!sock_flag(sk, SOCK_DEAD)) {
356 		printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
357 		return;
358 	}
359 
360 	if (u->addr)
361 		unix_release_addr(u->addr);
362 
363 	atomic_dec(&unix_nr_socks);
364 	local_bh_disable();
365 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
366 	local_bh_enable();
367 #ifdef UNIX_REFCNT_DEBUG
368 	printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk,
369 		atomic_read(&unix_nr_socks));
370 #endif
371 }
372 
373 static int unix_release_sock(struct sock *sk, int embrion)
374 {
375 	struct unix_sock *u = unix_sk(sk);
376 	struct dentry *dentry;
377 	struct vfsmount *mnt;
378 	struct sock *skpair;
379 	struct sk_buff *skb;
380 	int state;
381 
382 	unix_remove_socket(sk);
383 
384 	/* Clear state */
385 	unix_state_lock(sk);
386 	sock_orphan(sk);
387 	sk->sk_shutdown = SHUTDOWN_MASK;
388 	dentry	     = u->dentry;
389 	u->dentry    = NULL;
390 	mnt	     = u->mnt;
391 	u->mnt	     = NULL;
392 	state = sk->sk_state;
393 	sk->sk_state = TCP_CLOSE;
394 	unix_state_unlock(sk);
395 
396 	wake_up_interruptible_all(&u->peer_wait);
397 
398 	skpair = unix_peer(sk);
399 
400 	if (skpair != NULL) {
401 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
402 			unix_state_lock(skpair);
403 			/* No more writes */
404 			skpair->sk_shutdown = SHUTDOWN_MASK;
405 			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
406 				skpair->sk_err = ECONNRESET;
407 			unix_state_unlock(skpair);
408 			skpair->sk_state_change(skpair);
409 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
410 		}
411 		sock_put(skpair); /* It may now die */
412 		unix_peer(sk) = NULL;
413 	}
414 
415 	/* Try to flush out this socket. Throw out buffers at least */
416 
417 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
418 		if (state == TCP_LISTEN)
419 			unix_release_sock(skb->sk, 1);
420 		/* passed fds are erased in the kfree_skb hook	      */
421 		kfree_skb(skb);
422 	}
423 
424 	if (dentry) {
425 		dput(dentry);
426 		mntput(mnt);
427 	}
428 
429 	sock_put(sk);
430 
431 	/* ---- Socket is dead now and most probably destroyed ---- */
432 
433 	/*
434 	 * Fixme: BSD difference: In BSD all sockets connected to use get
435 	 *	  ECONNRESET and we die on the spot. In Linux we behave
436 	 *	  like files and pipes do and wait for the last
437 	 *	  dereference.
438 	 *
439 	 * Can't we simply set sock->err?
440 	 *
441 	 *	  What the above comment does talk about? --ANK(980817)
442 	 */
443 
444 	if (unix_tot_inflight)
445 		unix_gc();		/* Garbage collect fds */
446 
447 	return 0;
448 }
449 
450 static void init_peercred(struct sock *sk)
451 {
452 	put_pid(sk->sk_peer_pid);
453 	if (sk->sk_peer_cred)
454 		put_cred(sk->sk_peer_cred);
455 	sk->sk_peer_pid  = get_pid(task_tgid(current));
456 	sk->sk_peer_cred = get_current_cred();
457 }
458 
459 static void copy_peercred(struct sock *sk, struct sock *peersk)
460 {
461 	put_pid(sk->sk_peer_pid);
462 	if (sk->sk_peer_cred)
463 		put_cred(sk->sk_peer_cred);
464 	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
465 	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
466 }
467 
468 static int unix_listen(struct socket *sock, int backlog)
469 {
470 	int err;
471 	struct sock *sk = sock->sk;
472 	struct unix_sock *u = unix_sk(sk);
473 	struct pid *old_pid = NULL;
474 	const struct cred *old_cred = NULL;
475 
476 	err = -EOPNOTSUPP;
477 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
478 		goto out;	/* Only stream/seqpacket sockets accept */
479 	err = -EINVAL;
480 	if (!u->addr)
481 		goto out;	/* No listens on an unbound socket */
482 	unix_state_lock(sk);
483 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
484 		goto out_unlock;
485 	if (backlog > sk->sk_max_ack_backlog)
486 		wake_up_interruptible_all(&u->peer_wait);
487 	sk->sk_max_ack_backlog	= backlog;
488 	sk->sk_state		= TCP_LISTEN;
489 	/* set credentials so connect can copy them */
490 	init_peercred(sk);
491 	err = 0;
492 
493 out_unlock:
494 	unix_state_unlock(sk);
495 	put_pid(old_pid);
496 	if (old_cred)
497 		put_cred(old_cred);
498 out:
499 	return err;
500 }
501 
502 static int unix_release(struct socket *);
503 static int unix_bind(struct socket *, struct sockaddr *, int);
504 static int unix_stream_connect(struct socket *, struct sockaddr *,
505 			       int addr_len, int flags);
506 static int unix_socketpair(struct socket *, struct socket *);
507 static int unix_accept(struct socket *, struct socket *, int);
508 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
509 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
510 static unsigned int unix_dgram_poll(struct file *, struct socket *,
511 				    poll_table *);
512 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
513 static int unix_shutdown(struct socket *, int);
514 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
515 			       struct msghdr *, size_t);
516 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
517 			       struct msghdr *, size_t, int);
518 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
519 			      struct msghdr *, size_t);
520 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
521 			      struct msghdr *, size_t, int);
522 static int unix_dgram_connect(struct socket *, struct sockaddr *,
523 			      int, int);
524 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
525 				  struct msghdr *, size_t);
526 
527 static const struct proto_ops unix_stream_ops = {
528 	.family =	PF_UNIX,
529 	.owner =	THIS_MODULE,
530 	.release =	unix_release,
531 	.bind =		unix_bind,
532 	.connect =	unix_stream_connect,
533 	.socketpair =	unix_socketpair,
534 	.accept =	unix_accept,
535 	.getname =	unix_getname,
536 	.poll =		unix_poll,
537 	.ioctl =	unix_ioctl,
538 	.listen =	unix_listen,
539 	.shutdown =	unix_shutdown,
540 	.setsockopt =	sock_no_setsockopt,
541 	.getsockopt =	sock_no_getsockopt,
542 	.sendmsg =	unix_stream_sendmsg,
543 	.recvmsg =	unix_stream_recvmsg,
544 	.mmap =		sock_no_mmap,
545 	.sendpage =	sock_no_sendpage,
546 };
547 
548 static const struct proto_ops unix_dgram_ops = {
549 	.family =	PF_UNIX,
550 	.owner =	THIS_MODULE,
551 	.release =	unix_release,
552 	.bind =		unix_bind,
553 	.connect =	unix_dgram_connect,
554 	.socketpair =	unix_socketpair,
555 	.accept =	sock_no_accept,
556 	.getname =	unix_getname,
557 	.poll =		unix_dgram_poll,
558 	.ioctl =	unix_ioctl,
559 	.listen =	sock_no_listen,
560 	.shutdown =	unix_shutdown,
561 	.setsockopt =	sock_no_setsockopt,
562 	.getsockopt =	sock_no_getsockopt,
563 	.sendmsg =	unix_dgram_sendmsg,
564 	.recvmsg =	unix_dgram_recvmsg,
565 	.mmap =		sock_no_mmap,
566 	.sendpage =	sock_no_sendpage,
567 };
568 
569 static const struct proto_ops unix_seqpacket_ops = {
570 	.family =	PF_UNIX,
571 	.owner =	THIS_MODULE,
572 	.release =	unix_release,
573 	.bind =		unix_bind,
574 	.connect =	unix_stream_connect,
575 	.socketpair =	unix_socketpair,
576 	.accept =	unix_accept,
577 	.getname =	unix_getname,
578 	.poll =		unix_dgram_poll,
579 	.ioctl =	unix_ioctl,
580 	.listen =	unix_listen,
581 	.shutdown =	unix_shutdown,
582 	.setsockopt =	sock_no_setsockopt,
583 	.getsockopt =	sock_no_getsockopt,
584 	.sendmsg =	unix_seqpacket_sendmsg,
585 	.recvmsg =	unix_dgram_recvmsg,
586 	.mmap =		sock_no_mmap,
587 	.sendpage =	sock_no_sendpage,
588 };
589 
590 static struct proto unix_proto = {
591 	.name			= "UNIX",
592 	.owner			= THIS_MODULE,
593 	.obj_size		= sizeof(struct unix_sock),
594 };
595 
596 /*
597  * AF_UNIX sockets do not interact with hardware, hence they
598  * dont trigger interrupts - so it's safe for them to have
599  * bh-unsafe locking for their sk_receive_queue.lock. Split off
600  * this special lock-class by reinitializing the spinlock key:
601  */
602 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
603 
604 static struct sock *unix_create1(struct net *net, struct socket *sock)
605 {
606 	struct sock *sk = NULL;
607 	struct unix_sock *u;
608 
609 	atomic_inc(&unix_nr_socks);
610 	if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
611 		goto out;
612 
613 	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
614 	if (!sk)
615 		goto out;
616 
617 	sock_init_data(sock, sk);
618 	lockdep_set_class(&sk->sk_receive_queue.lock,
619 				&af_unix_sk_receive_queue_lock_key);
620 
621 	sk->sk_write_space	= unix_write_space;
622 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
623 	sk->sk_destruct		= unix_sock_destructor;
624 	u	  = unix_sk(sk);
625 	u->dentry = NULL;
626 	u->mnt	  = NULL;
627 	spin_lock_init(&u->lock);
628 	atomic_long_set(&u->inflight, 0);
629 	INIT_LIST_HEAD(&u->link);
630 	mutex_init(&u->readlock); /* single task reading lock */
631 	init_waitqueue_head(&u->peer_wait);
632 	unix_insert_socket(unix_sockets_unbound, sk);
633 out:
634 	if (sk == NULL)
635 		atomic_dec(&unix_nr_socks);
636 	else {
637 		local_bh_disable();
638 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
639 		local_bh_enable();
640 	}
641 	return sk;
642 }
643 
644 static int unix_create(struct net *net, struct socket *sock, int protocol,
645 		       int kern)
646 {
647 	if (protocol && protocol != PF_UNIX)
648 		return -EPROTONOSUPPORT;
649 
650 	sock->state = SS_UNCONNECTED;
651 
652 	switch (sock->type) {
653 	case SOCK_STREAM:
654 		sock->ops = &unix_stream_ops;
655 		break;
656 		/*
657 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
658 		 *	nothing uses it.
659 		 */
660 	case SOCK_RAW:
661 		sock->type = SOCK_DGRAM;
662 	case SOCK_DGRAM:
663 		sock->ops = &unix_dgram_ops;
664 		break;
665 	case SOCK_SEQPACKET:
666 		sock->ops = &unix_seqpacket_ops;
667 		break;
668 	default:
669 		return -ESOCKTNOSUPPORT;
670 	}
671 
672 	return unix_create1(net, sock) ? 0 : -ENOMEM;
673 }
674 
675 static int unix_release(struct socket *sock)
676 {
677 	struct sock *sk = sock->sk;
678 
679 	if (!sk)
680 		return 0;
681 
682 	sock->sk = NULL;
683 
684 	return unix_release_sock(sk, 0);
685 }
686 
687 static int unix_autobind(struct socket *sock)
688 {
689 	struct sock *sk = sock->sk;
690 	struct net *net = sock_net(sk);
691 	struct unix_sock *u = unix_sk(sk);
692 	static u32 ordernum = 1;
693 	struct unix_address *addr;
694 	int err;
695 
696 	mutex_lock(&u->readlock);
697 
698 	err = 0;
699 	if (u->addr)
700 		goto out;
701 
702 	err = -ENOMEM;
703 	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
704 	if (!addr)
705 		goto out;
706 
707 	addr->name->sun_family = AF_UNIX;
708 	atomic_set(&addr->refcnt, 1);
709 
710 retry:
711 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
712 	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
713 
714 	spin_lock(&unix_table_lock);
715 	ordernum = (ordernum+1)&0xFFFFF;
716 
717 	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
718 				      addr->hash)) {
719 		spin_unlock(&unix_table_lock);
720 		/* Sanity yield. It is unusual case, but yet... */
721 		if (!(ordernum&0xFF))
722 			yield();
723 		goto retry;
724 	}
725 	addr->hash ^= sk->sk_type;
726 
727 	__unix_remove_socket(sk);
728 	u->addr = addr;
729 	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
730 	spin_unlock(&unix_table_lock);
731 	err = 0;
732 
733 out:	mutex_unlock(&u->readlock);
734 	return err;
735 }
736 
737 static struct sock *unix_find_other(struct net *net,
738 				    struct sockaddr_un *sunname, int len,
739 				    int type, unsigned hash, int *error)
740 {
741 	struct sock *u;
742 	struct path path;
743 	int err = 0;
744 
745 	if (sunname->sun_path[0]) {
746 		struct inode *inode;
747 		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
748 		if (err)
749 			goto fail;
750 		inode = path.dentry->d_inode;
751 		err = inode_permission(inode, MAY_WRITE);
752 		if (err)
753 			goto put_fail;
754 
755 		err = -ECONNREFUSED;
756 		if (!S_ISSOCK(inode->i_mode))
757 			goto put_fail;
758 		u = unix_find_socket_byinode(inode);
759 		if (!u)
760 			goto put_fail;
761 
762 		if (u->sk_type == type)
763 			touch_atime(path.mnt, path.dentry);
764 
765 		path_put(&path);
766 
767 		err = -EPROTOTYPE;
768 		if (u->sk_type != type) {
769 			sock_put(u);
770 			goto fail;
771 		}
772 	} else {
773 		err = -ECONNREFUSED;
774 		u = unix_find_socket_byname(net, sunname, len, type, hash);
775 		if (u) {
776 			struct dentry *dentry;
777 			dentry = unix_sk(u)->dentry;
778 			if (dentry)
779 				touch_atime(unix_sk(u)->mnt, dentry);
780 		} else
781 			goto fail;
782 	}
783 	return u;
784 
785 put_fail:
786 	path_put(&path);
787 fail:
788 	*error = err;
789 	return NULL;
790 }
791 
792 
793 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
794 {
795 	struct sock *sk = sock->sk;
796 	struct net *net = sock_net(sk);
797 	struct unix_sock *u = unix_sk(sk);
798 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
799 	struct dentry *dentry = NULL;
800 	struct nameidata nd;
801 	int err;
802 	unsigned hash;
803 	struct unix_address *addr;
804 	struct hlist_head *list;
805 
806 	err = -EINVAL;
807 	if (sunaddr->sun_family != AF_UNIX)
808 		goto out;
809 
810 	if (addr_len == sizeof(short)) {
811 		err = unix_autobind(sock);
812 		goto out;
813 	}
814 
815 	err = unix_mkname(sunaddr, addr_len, &hash);
816 	if (err < 0)
817 		goto out;
818 	addr_len = err;
819 
820 	mutex_lock(&u->readlock);
821 
822 	err = -EINVAL;
823 	if (u->addr)
824 		goto out_up;
825 
826 	err = -ENOMEM;
827 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
828 	if (!addr)
829 		goto out_up;
830 
831 	memcpy(addr->name, sunaddr, addr_len);
832 	addr->len = addr_len;
833 	addr->hash = hash ^ sk->sk_type;
834 	atomic_set(&addr->refcnt, 1);
835 
836 	if (sunaddr->sun_path[0]) {
837 		unsigned int mode;
838 		err = 0;
839 		/*
840 		 * Get the parent directory, calculate the hash for last
841 		 * component.
842 		 */
843 		err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
844 		if (err)
845 			goto out_mknod_parent;
846 
847 		dentry = lookup_create(&nd, 0);
848 		err = PTR_ERR(dentry);
849 		if (IS_ERR(dentry))
850 			goto out_mknod_unlock;
851 
852 		/*
853 		 * All right, let's create it.
854 		 */
855 		mode = S_IFSOCK |
856 		       (SOCK_INODE(sock)->i_mode & ~current_umask());
857 		err = mnt_want_write(nd.path.mnt);
858 		if (err)
859 			goto out_mknod_dput;
860 		err = security_path_mknod(&nd.path, dentry, mode, 0);
861 		if (err)
862 			goto out_mknod_drop_write;
863 		err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
864 out_mknod_drop_write:
865 		mnt_drop_write(nd.path.mnt);
866 		if (err)
867 			goto out_mknod_dput;
868 		mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
869 		dput(nd.path.dentry);
870 		nd.path.dentry = dentry;
871 
872 		addr->hash = UNIX_HASH_SIZE;
873 	}
874 
875 	spin_lock(&unix_table_lock);
876 
877 	if (!sunaddr->sun_path[0]) {
878 		err = -EADDRINUSE;
879 		if (__unix_find_socket_byname(net, sunaddr, addr_len,
880 					      sk->sk_type, hash)) {
881 			unix_release_addr(addr);
882 			goto out_unlock;
883 		}
884 
885 		list = &unix_socket_table[addr->hash];
886 	} else {
887 		list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
888 		u->dentry = nd.path.dentry;
889 		u->mnt    = nd.path.mnt;
890 	}
891 
892 	err = 0;
893 	__unix_remove_socket(sk);
894 	u->addr = addr;
895 	__unix_insert_socket(list, sk);
896 
897 out_unlock:
898 	spin_unlock(&unix_table_lock);
899 out_up:
900 	mutex_unlock(&u->readlock);
901 out:
902 	return err;
903 
904 out_mknod_dput:
905 	dput(dentry);
906 out_mknod_unlock:
907 	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
908 	path_put(&nd.path);
909 out_mknod_parent:
910 	if (err == -EEXIST)
911 		err = -EADDRINUSE;
912 	unix_release_addr(addr);
913 	goto out_up;
914 }
915 
916 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
917 {
918 	if (unlikely(sk1 == sk2) || !sk2) {
919 		unix_state_lock(sk1);
920 		return;
921 	}
922 	if (sk1 < sk2) {
923 		unix_state_lock(sk1);
924 		unix_state_lock_nested(sk2);
925 	} else {
926 		unix_state_lock(sk2);
927 		unix_state_lock_nested(sk1);
928 	}
929 }
930 
931 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
932 {
933 	if (unlikely(sk1 == sk2) || !sk2) {
934 		unix_state_unlock(sk1);
935 		return;
936 	}
937 	unix_state_unlock(sk1);
938 	unix_state_unlock(sk2);
939 }
940 
941 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
942 			      int alen, int flags)
943 {
944 	struct sock *sk = sock->sk;
945 	struct net *net = sock_net(sk);
946 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
947 	struct sock *other;
948 	unsigned hash;
949 	int err;
950 
951 	if (addr->sa_family != AF_UNSPEC) {
952 		err = unix_mkname(sunaddr, alen, &hash);
953 		if (err < 0)
954 			goto out;
955 		alen = err;
956 
957 		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
958 		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
959 			goto out;
960 
961 restart:
962 		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
963 		if (!other)
964 			goto out;
965 
966 		unix_state_double_lock(sk, other);
967 
968 		/* Apparently VFS overslept socket death. Retry. */
969 		if (sock_flag(other, SOCK_DEAD)) {
970 			unix_state_double_unlock(sk, other);
971 			sock_put(other);
972 			goto restart;
973 		}
974 
975 		err = -EPERM;
976 		if (!unix_may_send(sk, other))
977 			goto out_unlock;
978 
979 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
980 		if (err)
981 			goto out_unlock;
982 
983 	} else {
984 		/*
985 		 *	1003.1g breaking connected state with AF_UNSPEC
986 		 */
987 		other = NULL;
988 		unix_state_double_lock(sk, other);
989 	}
990 
991 	/*
992 	 * If it was connected, reconnect.
993 	 */
994 	if (unix_peer(sk)) {
995 		struct sock *old_peer = unix_peer(sk);
996 		unix_peer(sk) = other;
997 		unix_state_double_unlock(sk, other);
998 
999 		if (other != old_peer)
1000 			unix_dgram_disconnected(sk, old_peer);
1001 		sock_put(old_peer);
1002 	} else {
1003 		unix_peer(sk) = other;
1004 		unix_state_double_unlock(sk, other);
1005 	}
1006 	return 0;
1007 
1008 out_unlock:
1009 	unix_state_double_unlock(sk, other);
1010 	sock_put(other);
1011 out:
1012 	return err;
1013 }
1014 
1015 static long unix_wait_for_peer(struct sock *other, long timeo)
1016 {
1017 	struct unix_sock *u = unix_sk(other);
1018 	int sched;
1019 	DEFINE_WAIT(wait);
1020 
1021 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1022 
1023 	sched = !sock_flag(other, SOCK_DEAD) &&
1024 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1025 		unix_recvq_full(other);
1026 
1027 	unix_state_unlock(other);
1028 
1029 	if (sched)
1030 		timeo = schedule_timeout(timeo);
1031 
1032 	finish_wait(&u->peer_wait, &wait);
1033 	return timeo;
1034 }
1035 
1036 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1037 			       int addr_len, int flags)
1038 {
1039 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1040 	struct sock *sk = sock->sk;
1041 	struct net *net = sock_net(sk);
1042 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1043 	struct sock *newsk = NULL;
1044 	struct sock *other = NULL;
1045 	struct sk_buff *skb = NULL;
1046 	unsigned hash;
1047 	int st;
1048 	int err;
1049 	long timeo;
1050 
1051 	err = unix_mkname(sunaddr, addr_len, &hash);
1052 	if (err < 0)
1053 		goto out;
1054 	addr_len = err;
1055 
1056 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1057 	    (err = unix_autobind(sock)) != 0)
1058 		goto out;
1059 
1060 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1061 
1062 	/* First of all allocate resources.
1063 	   If we will make it after state is locked,
1064 	   we will have to recheck all again in any case.
1065 	 */
1066 
1067 	err = -ENOMEM;
1068 
1069 	/* create new sock for complete connection */
1070 	newsk = unix_create1(sock_net(sk), NULL);
1071 	if (newsk == NULL)
1072 		goto out;
1073 
1074 	/* Allocate skb for sending to listening sock */
1075 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1076 	if (skb == NULL)
1077 		goto out;
1078 
1079 restart:
1080 	/*  Find listening sock. */
1081 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1082 	if (!other)
1083 		goto out;
1084 
1085 	/* Latch state of peer */
1086 	unix_state_lock(other);
1087 
1088 	/* Apparently VFS overslept socket death. Retry. */
1089 	if (sock_flag(other, SOCK_DEAD)) {
1090 		unix_state_unlock(other);
1091 		sock_put(other);
1092 		goto restart;
1093 	}
1094 
1095 	err = -ECONNREFUSED;
1096 	if (other->sk_state != TCP_LISTEN)
1097 		goto out_unlock;
1098 	if (other->sk_shutdown & RCV_SHUTDOWN)
1099 		goto out_unlock;
1100 
1101 	if (unix_recvq_full(other)) {
1102 		err = -EAGAIN;
1103 		if (!timeo)
1104 			goto out_unlock;
1105 
1106 		timeo = unix_wait_for_peer(other, timeo);
1107 
1108 		err = sock_intr_errno(timeo);
1109 		if (signal_pending(current))
1110 			goto out;
1111 		sock_put(other);
1112 		goto restart;
1113 	}
1114 
1115 	/* Latch our state.
1116 
1117 	   It is tricky place. We need to grab write lock and cannot
1118 	   drop lock on peer. It is dangerous because deadlock is
1119 	   possible. Connect to self case and simultaneous
1120 	   attempt to connect are eliminated by checking socket
1121 	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1122 	   check this before attempt to grab lock.
1123 
1124 	   Well, and we have to recheck the state after socket locked.
1125 	 */
1126 	st = sk->sk_state;
1127 
1128 	switch (st) {
1129 	case TCP_CLOSE:
1130 		/* This is ok... continue with connect */
1131 		break;
1132 	case TCP_ESTABLISHED:
1133 		/* Socket is already connected */
1134 		err = -EISCONN;
1135 		goto out_unlock;
1136 	default:
1137 		err = -EINVAL;
1138 		goto out_unlock;
1139 	}
1140 
1141 	unix_state_lock_nested(sk);
1142 
1143 	if (sk->sk_state != st) {
1144 		unix_state_unlock(sk);
1145 		unix_state_unlock(other);
1146 		sock_put(other);
1147 		goto restart;
1148 	}
1149 
1150 	err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1151 	if (err) {
1152 		unix_state_unlock(sk);
1153 		goto out_unlock;
1154 	}
1155 
1156 	/* The way is open! Fastly set all the necessary fields... */
1157 
1158 	sock_hold(sk);
1159 	unix_peer(newsk)	= sk;
1160 	newsk->sk_state		= TCP_ESTABLISHED;
1161 	newsk->sk_type		= sk->sk_type;
1162 	init_peercred(newsk);
1163 	newu = unix_sk(newsk);
1164 	newsk->sk_wq		= &newu->peer_wq;
1165 	otheru = unix_sk(other);
1166 
1167 	/* copy address information from listening to new sock*/
1168 	if (otheru->addr) {
1169 		atomic_inc(&otheru->addr->refcnt);
1170 		newu->addr = otheru->addr;
1171 	}
1172 	if (otheru->dentry) {
1173 		newu->dentry	= dget(otheru->dentry);
1174 		newu->mnt	= mntget(otheru->mnt);
1175 	}
1176 
1177 	/* Set credentials */
1178 	copy_peercred(sk, other);
1179 
1180 	sock->state	= SS_CONNECTED;
1181 	sk->sk_state	= TCP_ESTABLISHED;
1182 	sock_hold(newsk);
1183 
1184 	smp_mb__after_atomic_inc();	/* sock_hold() does an atomic_inc() */
1185 	unix_peer(sk)	= newsk;
1186 
1187 	unix_state_unlock(sk);
1188 
1189 	/* take ten and and send info to listening sock */
1190 	spin_lock(&other->sk_receive_queue.lock);
1191 	__skb_queue_tail(&other->sk_receive_queue, skb);
1192 	spin_unlock(&other->sk_receive_queue.lock);
1193 	unix_state_unlock(other);
1194 	other->sk_data_ready(other, 0);
1195 	sock_put(other);
1196 	return 0;
1197 
1198 out_unlock:
1199 	if (other)
1200 		unix_state_unlock(other);
1201 
1202 out:
1203 	kfree_skb(skb);
1204 	if (newsk)
1205 		unix_release_sock(newsk, 0);
1206 	if (other)
1207 		sock_put(other);
1208 	return err;
1209 }
1210 
1211 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1212 {
1213 	struct sock *ska = socka->sk, *skb = sockb->sk;
1214 
1215 	/* Join our sockets back to back */
1216 	sock_hold(ska);
1217 	sock_hold(skb);
1218 	unix_peer(ska) = skb;
1219 	unix_peer(skb) = ska;
1220 	init_peercred(ska);
1221 	init_peercred(skb);
1222 
1223 	if (ska->sk_type != SOCK_DGRAM) {
1224 		ska->sk_state = TCP_ESTABLISHED;
1225 		skb->sk_state = TCP_ESTABLISHED;
1226 		socka->state  = SS_CONNECTED;
1227 		sockb->state  = SS_CONNECTED;
1228 	}
1229 	return 0;
1230 }
1231 
1232 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1233 {
1234 	struct sock *sk = sock->sk;
1235 	struct sock *tsk;
1236 	struct sk_buff *skb;
1237 	int err;
1238 
1239 	err = -EOPNOTSUPP;
1240 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1241 		goto out;
1242 
1243 	err = -EINVAL;
1244 	if (sk->sk_state != TCP_LISTEN)
1245 		goto out;
1246 
1247 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1248 	 * so that no locks are necessary.
1249 	 */
1250 
1251 	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1252 	if (!skb) {
1253 		/* This means receive shutdown. */
1254 		if (err == 0)
1255 			err = -EINVAL;
1256 		goto out;
1257 	}
1258 
1259 	tsk = skb->sk;
1260 	skb_free_datagram(sk, skb);
1261 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1262 
1263 	/* attach accepted sock to socket */
1264 	unix_state_lock(tsk);
1265 	newsock->state = SS_CONNECTED;
1266 	sock_graft(tsk, newsock);
1267 	unix_state_unlock(tsk);
1268 	return 0;
1269 
1270 out:
1271 	return err;
1272 }
1273 
1274 
1275 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1276 {
1277 	struct sock *sk = sock->sk;
1278 	struct unix_sock *u;
1279 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1280 	int err = 0;
1281 
1282 	if (peer) {
1283 		sk = unix_peer_get(sk);
1284 
1285 		err = -ENOTCONN;
1286 		if (!sk)
1287 			goto out;
1288 		err = 0;
1289 	} else {
1290 		sock_hold(sk);
1291 	}
1292 
1293 	u = unix_sk(sk);
1294 	unix_state_lock(sk);
1295 	if (!u->addr) {
1296 		sunaddr->sun_family = AF_UNIX;
1297 		sunaddr->sun_path[0] = 0;
1298 		*uaddr_len = sizeof(short);
1299 	} else {
1300 		struct unix_address *addr = u->addr;
1301 
1302 		*uaddr_len = addr->len;
1303 		memcpy(sunaddr, addr->name, *uaddr_len);
1304 	}
1305 	unix_state_unlock(sk);
1306 	sock_put(sk);
1307 out:
1308 	return err;
1309 }
1310 
1311 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1312 {
1313 	int i;
1314 
1315 	scm->fp = UNIXCB(skb).fp;
1316 	UNIXCB(skb).fp = NULL;
1317 
1318 	for (i = scm->fp->count-1; i >= 0; i--)
1319 		unix_notinflight(scm->fp->fp[i]);
1320 }
1321 
1322 static void unix_destruct_scm(struct sk_buff *skb)
1323 {
1324 	struct scm_cookie scm;
1325 	memset(&scm, 0, sizeof(scm));
1326 	scm.pid  = UNIXCB(skb).pid;
1327 	scm.cred = UNIXCB(skb).cred;
1328 	if (UNIXCB(skb).fp)
1329 		unix_detach_fds(&scm, skb);
1330 
1331 	/* Alas, it calls VFS */
1332 	/* So fscking what? fput() had been SMP-safe since the last Summer */
1333 	scm_destroy(&scm);
1334 	sock_wfree(skb);
1335 }
1336 
1337 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1338 {
1339 	int i;
1340 
1341 	/*
1342 	 * Need to duplicate file references for the sake of garbage
1343 	 * collection.  Otherwise a socket in the fps might become a
1344 	 * candidate for GC while the skb is not yet queued.
1345 	 */
1346 	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1347 	if (!UNIXCB(skb).fp)
1348 		return -ENOMEM;
1349 
1350 	for (i = scm->fp->count-1; i >= 0; i--)
1351 		unix_inflight(scm->fp->fp[i]);
1352 	return 0;
1353 }
1354 
1355 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1356 {
1357 	int err = 0;
1358 	UNIXCB(skb).pid  = get_pid(scm->pid);
1359 	UNIXCB(skb).cred = get_cred(scm->cred);
1360 	UNIXCB(skb).fp = NULL;
1361 	if (scm->fp && send_fds)
1362 		err = unix_attach_fds(scm, skb);
1363 
1364 	skb->destructor = unix_destruct_scm;
1365 	return err;
1366 }
1367 
1368 /*
1369  *	Send AF_UNIX data.
1370  */
1371 
1372 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1373 			      struct msghdr *msg, size_t len)
1374 {
1375 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1376 	struct sock *sk = sock->sk;
1377 	struct net *net = sock_net(sk);
1378 	struct unix_sock *u = unix_sk(sk);
1379 	struct sockaddr_un *sunaddr = msg->msg_name;
1380 	struct sock *other = NULL;
1381 	int namelen = 0; /* fake GCC */
1382 	int err;
1383 	unsigned hash;
1384 	struct sk_buff *skb;
1385 	long timeo;
1386 	struct scm_cookie tmp_scm;
1387 
1388 	if (NULL == siocb->scm)
1389 		siocb->scm = &tmp_scm;
1390 	wait_for_unix_gc();
1391 	err = scm_send(sock, msg, siocb->scm);
1392 	if (err < 0)
1393 		return err;
1394 
1395 	err = -EOPNOTSUPP;
1396 	if (msg->msg_flags&MSG_OOB)
1397 		goto out;
1398 
1399 	if (msg->msg_namelen) {
1400 		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1401 		if (err < 0)
1402 			goto out;
1403 		namelen = err;
1404 	} else {
1405 		sunaddr = NULL;
1406 		err = -ENOTCONN;
1407 		other = unix_peer_get(sk);
1408 		if (!other)
1409 			goto out;
1410 	}
1411 
1412 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1413 	    && (err = unix_autobind(sock)) != 0)
1414 		goto out;
1415 
1416 	err = -EMSGSIZE;
1417 	if (len > sk->sk_sndbuf - 32)
1418 		goto out;
1419 
1420 	skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1421 	if (skb == NULL)
1422 		goto out;
1423 
1424 	err = unix_scm_to_skb(siocb->scm, skb, true);
1425 	if (err)
1426 		goto out_free;
1427 	unix_get_secdata(siocb->scm, skb);
1428 
1429 	skb_reset_transport_header(skb);
1430 	err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1431 	if (err)
1432 		goto out_free;
1433 
1434 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1435 
1436 restart:
1437 	if (!other) {
1438 		err = -ECONNRESET;
1439 		if (sunaddr == NULL)
1440 			goto out_free;
1441 
1442 		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1443 					hash, &err);
1444 		if (other == NULL)
1445 			goto out_free;
1446 	}
1447 
1448 	unix_state_lock(other);
1449 	err = -EPERM;
1450 	if (!unix_may_send(sk, other))
1451 		goto out_unlock;
1452 
1453 	if (sock_flag(other, SOCK_DEAD)) {
1454 		/*
1455 		 *	Check with 1003.1g - what should
1456 		 *	datagram error
1457 		 */
1458 		unix_state_unlock(other);
1459 		sock_put(other);
1460 
1461 		err = 0;
1462 		unix_state_lock(sk);
1463 		if (unix_peer(sk) == other) {
1464 			unix_peer(sk) = NULL;
1465 			unix_state_unlock(sk);
1466 
1467 			unix_dgram_disconnected(sk, other);
1468 			sock_put(other);
1469 			err = -ECONNREFUSED;
1470 		} else {
1471 			unix_state_unlock(sk);
1472 		}
1473 
1474 		other = NULL;
1475 		if (err)
1476 			goto out_free;
1477 		goto restart;
1478 	}
1479 
1480 	err = -EPIPE;
1481 	if (other->sk_shutdown & RCV_SHUTDOWN)
1482 		goto out_unlock;
1483 
1484 	if (sk->sk_type != SOCK_SEQPACKET) {
1485 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1486 		if (err)
1487 			goto out_unlock;
1488 	}
1489 
1490 	if (unix_peer(other) != sk && unix_recvq_full(other)) {
1491 		if (!timeo) {
1492 			err = -EAGAIN;
1493 			goto out_unlock;
1494 		}
1495 
1496 		timeo = unix_wait_for_peer(other, timeo);
1497 
1498 		err = sock_intr_errno(timeo);
1499 		if (signal_pending(current))
1500 			goto out_free;
1501 
1502 		goto restart;
1503 	}
1504 
1505 	skb_queue_tail(&other->sk_receive_queue, skb);
1506 	unix_state_unlock(other);
1507 	other->sk_data_ready(other, len);
1508 	sock_put(other);
1509 	scm_destroy(siocb->scm);
1510 	return len;
1511 
1512 out_unlock:
1513 	unix_state_unlock(other);
1514 out_free:
1515 	kfree_skb(skb);
1516 out:
1517 	if (other)
1518 		sock_put(other);
1519 	scm_destroy(siocb->scm);
1520 	return err;
1521 }
1522 
1523 
1524 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1525 			       struct msghdr *msg, size_t len)
1526 {
1527 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1528 	struct sock *sk = sock->sk;
1529 	struct sock *other = NULL;
1530 	struct sockaddr_un *sunaddr = msg->msg_name;
1531 	int err, size;
1532 	struct sk_buff *skb;
1533 	int sent = 0;
1534 	struct scm_cookie tmp_scm;
1535 	bool fds_sent = false;
1536 
1537 	if (NULL == siocb->scm)
1538 		siocb->scm = &tmp_scm;
1539 	wait_for_unix_gc();
1540 	err = scm_send(sock, msg, siocb->scm);
1541 	if (err < 0)
1542 		return err;
1543 
1544 	err = -EOPNOTSUPP;
1545 	if (msg->msg_flags&MSG_OOB)
1546 		goto out_err;
1547 
1548 	if (msg->msg_namelen) {
1549 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1550 		goto out_err;
1551 	} else {
1552 		sunaddr = NULL;
1553 		err = -ENOTCONN;
1554 		other = unix_peer(sk);
1555 		if (!other)
1556 			goto out_err;
1557 	}
1558 
1559 	if (sk->sk_shutdown & SEND_SHUTDOWN)
1560 		goto pipe_err;
1561 
1562 	while (sent < len) {
1563 		/*
1564 		 *	Optimisation for the fact that under 0.01% of X
1565 		 *	messages typically need breaking up.
1566 		 */
1567 
1568 		size = len-sent;
1569 
1570 		/* Keep two messages in the pipe so it schedules better */
1571 		if (size > ((sk->sk_sndbuf >> 1) - 64))
1572 			size = (sk->sk_sndbuf >> 1) - 64;
1573 
1574 		if (size > SKB_MAX_ALLOC)
1575 			size = SKB_MAX_ALLOC;
1576 
1577 		/*
1578 		 *	Grab a buffer
1579 		 */
1580 
1581 		skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1582 					  &err);
1583 
1584 		if (skb == NULL)
1585 			goto out_err;
1586 
1587 		/*
1588 		 *	If you pass two values to the sock_alloc_send_skb
1589 		 *	it tries to grab the large buffer with GFP_NOFS
1590 		 *	(which can fail easily), and if it fails grab the
1591 		 *	fallback size buffer which is under a page and will
1592 		 *	succeed. [Alan]
1593 		 */
1594 		size = min_t(int, size, skb_tailroom(skb));
1595 
1596 
1597 		/* Only send the fds in the first buffer */
1598 		err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
1599 		if (err) {
1600 			kfree_skb(skb);
1601 			goto out_err;
1602 		}
1603 		fds_sent = true;
1604 
1605 		err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1606 		if (err) {
1607 			kfree_skb(skb);
1608 			goto out_err;
1609 		}
1610 
1611 		unix_state_lock(other);
1612 
1613 		if (sock_flag(other, SOCK_DEAD) ||
1614 		    (other->sk_shutdown & RCV_SHUTDOWN))
1615 			goto pipe_err_free;
1616 
1617 		skb_queue_tail(&other->sk_receive_queue, skb);
1618 		unix_state_unlock(other);
1619 		other->sk_data_ready(other, size);
1620 		sent += size;
1621 	}
1622 
1623 	scm_destroy(siocb->scm);
1624 	siocb->scm = NULL;
1625 
1626 	return sent;
1627 
1628 pipe_err_free:
1629 	unix_state_unlock(other);
1630 	kfree_skb(skb);
1631 pipe_err:
1632 	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1633 		send_sig(SIGPIPE, current, 0);
1634 	err = -EPIPE;
1635 out_err:
1636 	scm_destroy(siocb->scm);
1637 	siocb->scm = NULL;
1638 	return sent ? : err;
1639 }
1640 
1641 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1642 				  struct msghdr *msg, size_t len)
1643 {
1644 	int err;
1645 	struct sock *sk = sock->sk;
1646 
1647 	err = sock_error(sk);
1648 	if (err)
1649 		return err;
1650 
1651 	if (sk->sk_state != TCP_ESTABLISHED)
1652 		return -ENOTCONN;
1653 
1654 	if (msg->msg_namelen)
1655 		msg->msg_namelen = 0;
1656 
1657 	return unix_dgram_sendmsg(kiocb, sock, msg, len);
1658 }
1659 
1660 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1661 {
1662 	struct unix_sock *u = unix_sk(sk);
1663 
1664 	msg->msg_namelen = 0;
1665 	if (u->addr) {
1666 		msg->msg_namelen = u->addr->len;
1667 		memcpy(msg->msg_name, u->addr->name, u->addr->len);
1668 	}
1669 }
1670 
1671 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1672 			      struct msghdr *msg, size_t size,
1673 			      int flags)
1674 {
1675 	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1676 	struct scm_cookie tmp_scm;
1677 	struct sock *sk = sock->sk;
1678 	struct unix_sock *u = unix_sk(sk);
1679 	int noblock = flags & MSG_DONTWAIT;
1680 	struct sk_buff *skb;
1681 	int err;
1682 
1683 	err = -EOPNOTSUPP;
1684 	if (flags&MSG_OOB)
1685 		goto out;
1686 
1687 	msg->msg_namelen = 0;
1688 
1689 	mutex_lock(&u->readlock);
1690 
1691 	skb = skb_recv_datagram(sk, flags, noblock, &err);
1692 	if (!skb) {
1693 		unix_state_lock(sk);
1694 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1695 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1696 		    (sk->sk_shutdown & RCV_SHUTDOWN))
1697 			err = 0;
1698 		unix_state_unlock(sk);
1699 		goto out_unlock;
1700 	}
1701 
1702 	wake_up_interruptible_sync(&u->peer_wait);
1703 
1704 	if (msg->msg_name)
1705 		unix_copy_addr(msg, skb->sk);
1706 
1707 	if (size > skb->len)
1708 		size = skb->len;
1709 	else if (size < skb->len)
1710 		msg->msg_flags |= MSG_TRUNC;
1711 
1712 	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1713 	if (err)
1714 		goto out_free;
1715 
1716 	if (!siocb->scm) {
1717 		siocb->scm = &tmp_scm;
1718 		memset(&tmp_scm, 0, sizeof(tmp_scm));
1719 	}
1720 	scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
1721 	unix_set_secdata(siocb->scm, skb);
1722 
1723 	if (!(flags & MSG_PEEK)) {
1724 		if (UNIXCB(skb).fp)
1725 			unix_detach_fds(siocb->scm, skb);
1726 	} else {
1727 		/* It is questionable: on PEEK we could:
1728 		   - do not return fds - good, but too simple 8)
1729 		   - return fds, and do not return them on read (old strategy,
1730 		     apparently wrong)
1731 		   - clone fds (I chose it for now, it is the most universal
1732 		     solution)
1733 
1734 		   POSIX 1003.1g does not actually define this clearly
1735 		   at all. POSIX 1003.1g doesn't define a lot of things
1736 		   clearly however!
1737 
1738 		*/
1739 		if (UNIXCB(skb).fp)
1740 			siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1741 	}
1742 	err = size;
1743 
1744 	scm_recv(sock, msg, siocb->scm, flags);
1745 
1746 out_free:
1747 	skb_free_datagram(sk, skb);
1748 out_unlock:
1749 	mutex_unlock(&u->readlock);
1750 out:
1751 	return err;
1752 }
1753 
1754 /*
1755  *	Sleep until data has arrive. But check for races..
1756  */
1757 
1758 static long unix_stream_data_wait(struct sock *sk, long timeo)
1759 {
1760 	DEFINE_WAIT(wait);
1761 
1762 	unix_state_lock(sk);
1763 
1764 	for (;;) {
1765 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1766 
1767 		if (!skb_queue_empty(&sk->sk_receive_queue) ||
1768 		    sk->sk_err ||
1769 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1770 		    signal_pending(current) ||
1771 		    !timeo)
1772 			break;
1773 
1774 		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1775 		unix_state_unlock(sk);
1776 		timeo = schedule_timeout(timeo);
1777 		unix_state_lock(sk);
1778 		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1779 	}
1780 
1781 	finish_wait(sk_sleep(sk), &wait);
1782 	unix_state_unlock(sk);
1783 	return timeo;
1784 }
1785 
1786 
1787 
1788 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1789 			       struct msghdr *msg, size_t size,
1790 			       int flags)
1791 {
1792 	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1793 	struct scm_cookie tmp_scm;
1794 	struct sock *sk = sock->sk;
1795 	struct unix_sock *u = unix_sk(sk);
1796 	struct sockaddr_un *sunaddr = msg->msg_name;
1797 	int copied = 0;
1798 	int check_creds = 0;
1799 	int target;
1800 	int err = 0;
1801 	long timeo;
1802 
1803 	err = -EINVAL;
1804 	if (sk->sk_state != TCP_ESTABLISHED)
1805 		goto out;
1806 
1807 	err = -EOPNOTSUPP;
1808 	if (flags&MSG_OOB)
1809 		goto out;
1810 
1811 	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1812 	timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1813 
1814 	msg->msg_namelen = 0;
1815 
1816 	/* Lock the socket to prevent queue disordering
1817 	 * while sleeps in memcpy_tomsg
1818 	 */
1819 
1820 	if (!siocb->scm) {
1821 		siocb->scm = &tmp_scm;
1822 		memset(&tmp_scm, 0, sizeof(tmp_scm));
1823 	}
1824 
1825 	mutex_lock(&u->readlock);
1826 
1827 	do {
1828 		int chunk;
1829 		struct sk_buff *skb;
1830 
1831 		unix_state_lock(sk);
1832 		skb = skb_dequeue(&sk->sk_receive_queue);
1833 		if (skb == NULL) {
1834 			if (copied >= target)
1835 				goto unlock;
1836 
1837 			/*
1838 			 *	POSIX 1003.1g mandates this order.
1839 			 */
1840 
1841 			err = sock_error(sk);
1842 			if (err)
1843 				goto unlock;
1844 			if (sk->sk_shutdown & RCV_SHUTDOWN)
1845 				goto unlock;
1846 
1847 			unix_state_unlock(sk);
1848 			err = -EAGAIN;
1849 			if (!timeo)
1850 				break;
1851 			mutex_unlock(&u->readlock);
1852 
1853 			timeo = unix_stream_data_wait(sk, timeo);
1854 
1855 			if (signal_pending(current)) {
1856 				err = sock_intr_errno(timeo);
1857 				goto out;
1858 			}
1859 			mutex_lock(&u->readlock);
1860 			continue;
1861  unlock:
1862 			unix_state_unlock(sk);
1863 			break;
1864 		}
1865 		unix_state_unlock(sk);
1866 
1867 		if (check_creds) {
1868 			/* Never glue messages from different writers */
1869 			if ((UNIXCB(skb).pid  != siocb->scm->pid) ||
1870 			    (UNIXCB(skb).cred != siocb->scm->cred)) {
1871 				skb_queue_head(&sk->sk_receive_queue, skb);
1872 				break;
1873 			}
1874 		} else {
1875 			/* Copy credentials */
1876 			scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
1877 			check_creds = 1;
1878 		}
1879 
1880 		/* Copy address just once */
1881 		if (sunaddr) {
1882 			unix_copy_addr(msg, skb->sk);
1883 			sunaddr = NULL;
1884 		}
1885 
1886 		chunk = min_t(unsigned int, skb->len, size);
1887 		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1888 			skb_queue_head(&sk->sk_receive_queue, skb);
1889 			if (copied == 0)
1890 				copied = -EFAULT;
1891 			break;
1892 		}
1893 		copied += chunk;
1894 		size -= chunk;
1895 
1896 		/* Mark read part of skb as used */
1897 		if (!(flags & MSG_PEEK)) {
1898 			skb_pull(skb, chunk);
1899 
1900 			if (UNIXCB(skb).fp)
1901 				unix_detach_fds(siocb->scm, skb);
1902 
1903 			/* put the skb back if we didn't use it up.. */
1904 			if (skb->len) {
1905 				skb_queue_head(&sk->sk_receive_queue, skb);
1906 				break;
1907 			}
1908 
1909 			consume_skb(skb);
1910 
1911 			if (siocb->scm->fp)
1912 				break;
1913 		} else {
1914 			/* It is questionable, see note in unix_dgram_recvmsg.
1915 			 */
1916 			if (UNIXCB(skb).fp)
1917 				siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1918 
1919 			/* put message back and return */
1920 			skb_queue_head(&sk->sk_receive_queue, skb);
1921 			break;
1922 		}
1923 	} while (size);
1924 
1925 	mutex_unlock(&u->readlock);
1926 	scm_recv(sock, msg, siocb->scm, flags);
1927 out:
1928 	return copied ? : err;
1929 }
1930 
1931 static int unix_shutdown(struct socket *sock, int mode)
1932 {
1933 	struct sock *sk = sock->sk;
1934 	struct sock *other;
1935 
1936 	mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1937 
1938 	if (mode) {
1939 		unix_state_lock(sk);
1940 		sk->sk_shutdown |= mode;
1941 		other = unix_peer(sk);
1942 		if (other)
1943 			sock_hold(other);
1944 		unix_state_unlock(sk);
1945 		sk->sk_state_change(sk);
1946 
1947 		if (other &&
1948 			(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1949 
1950 			int peer_mode = 0;
1951 
1952 			if (mode&RCV_SHUTDOWN)
1953 				peer_mode |= SEND_SHUTDOWN;
1954 			if (mode&SEND_SHUTDOWN)
1955 				peer_mode |= RCV_SHUTDOWN;
1956 			unix_state_lock(other);
1957 			other->sk_shutdown |= peer_mode;
1958 			unix_state_unlock(other);
1959 			other->sk_state_change(other);
1960 			if (peer_mode == SHUTDOWN_MASK)
1961 				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
1962 			else if (peer_mode & RCV_SHUTDOWN)
1963 				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1964 		}
1965 		if (other)
1966 			sock_put(other);
1967 	}
1968 	return 0;
1969 }
1970 
1971 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1972 {
1973 	struct sock *sk = sock->sk;
1974 	long amount = 0;
1975 	int err;
1976 
1977 	switch (cmd) {
1978 	case SIOCOUTQ:
1979 		amount = sk_wmem_alloc_get(sk);
1980 		err = put_user(amount, (int __user *)arg);
1981 		break;
1982 	case SIOCINQ:
1983 		{
1984 			struct sk_buff *skb;
1985 
1986 			if (sk->sk_state == TCP_LISTEN) {
1987 				err = -EINVAL;
1988 				break;
1989 			}
1990 
1991 			spin_lock(&sk->sk_receive_queue.lock);
1992 			if (sk->sk_type == SOCK_STREAM ||
1993 			    sk->sk_type == SOCK_SEQPACKET) {
1994 				skb_queue_walk(&sk->sk_receive_queue, skb)
1995 					amount += skb->len;
1996 			} else {
1997 				skb = skb_peek(&sk->sk_receive_queue);
1998 				if (skb)
1999 					amount = skb->len;
2000 			}
2001 			spin_unlock(&sk->sk_receive_queue.lock);
2002 			err = put_user(amount, (int __user *)arg);
2003 			break;
2004 		}
2005 
2006 	default:
2007 		err = -ENOIOCTLCMD;
2008 		break;
2009 	}
2010 	return err;
2011 }
2012 
2013 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2014 {
2015 	struct sock *sk = sock->sk;
2016 	unsigned int mask;
2017 
2018 	sock_poll_wait(file, sk_sleep(sk), wait);
2019 	mask = 0;
2020 
2021 	/* exceptional events? */
2022 	if (sk->sk_err)
2023 		mask |= POLLERR;
2024 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2025 		mask |= POLLHUP;
2026 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2027 		mask |= POLLRDHUP;
2028 
2029 	/* readable? */
2030 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
2031 	    (sk->sk_shutdown & RCV_SHUTDOWN))
2032 		mask |= POLLIN | POLLRDNORM;
2033 
2034 	/* Connection-based need to check for termination and startup */
2035 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2036 	    sk->sk_state == TCP_CLOSE)
2037 		mask |= POLLHUP;
2038 
2039 	/*
2040 	 * we set writable also when the other side has shut down the
2041 	 * connection. This prevents stuck sockets.
2042 	 */
2043 	if (unix_writable(sk))
2044 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2045 
2046 	return mask;
2047 }
2048 
2049 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2050 				    poll_table *wait)
2051 {
2052 	struct sock *sk = sock->sk, *other;
2053 	unsigned int mask, writable;
2054 
2055 	sock_poll_wait(file, sk_sleep(sk), wait);
2056 	mask = 0;
2057 
2058 	/* exceptional events? */
2059 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2060 		mask |= POLLERR;
2061 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2062 		mask |= POLLRDHUP;
2063 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2064 		mask |= POLLHUP;
2065 
2066 	/* readable? */
2067 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
2068 	    (sk->sk_shutdown & RCV_SHUTDOWN))
2069 		mask |= POLLIN | POLLRDNORM;
2070 
2071 	/* Connection-based need to check for termination and startup */
2072 	if (sk->sk_type == SOCK_SEQPACKET) {
2073 		if (sk->sk_state == TCP_CLOSE)
2074 			mask |= POLLHUP;
2075 		/* connection hasn't started yet? */
2076 		if (sk->sk_state == TCP_SYN_SENT)
2077 			return mask;
2078 	}
2079 
2080 	/* writable? */
2081 	writable = unix_writable(sk);
2082 	if (writable) {
2083 		other = unix_peer_get(sk);
2084 		if (other) {
2085 			if (unix_peer(other) != sk) {
2086 				sock_poll_wait(file, &unix_sk(other)->peer_wait,
2087 					  wait);
2088 				if (unix_recvq_full(other))
2089 					writable = 0;
2090 			}
2091 
2092 			sock_put(other);
2093 		}
2094 	}
2095 
2096 	if (writable)
2097 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2098 	else
2099 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2100 
2101 	return mask;
2102 }
2103 
2104 #ifdef CONFIG_PROC_FS
2105 static struct sock *first_unix_socket(int *i)
2106 {
2107 	for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
2108 		if (!hlist_empty(&unix_socket_table[*i]))
2109 			return __sk_head(&unix_socket_table[*i]);
2110 	}
2111 	return NULL;
2112 }
2113 
2114 static struct sock *next_unix_socket(int *i, struct sock *s)
2115 {
2116 	struct sock *next = sk_next(s);
2117 	/* More in this chain? */
2118 	if (next)
2119 		return next;
2120 	/* Look for next non-empty chain. */
2121 	for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2122 		if (!hlist_empty(&unix_socket_table[*i]))
2123 			return __sk_head(&unix_socket_table[*i]);
2124 	}
2125 	return NULL;
2126 }
2127 
2128 struct unix_iter_state {
2129 	struct seq_net_private p;
2130 	int i;
2131 };
2132 
2133 static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
2134 {
2135 	struct unix_iter_state *iter = seq->private;
2136 	loff_t off = 0;
2137 	struct sock *s;
2138 
2139 	for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2140 		if (sock_net(s) != seq_file_net(seq))
2141 			continue;
2142 		if (off == pos)
2143 			return s;
2144 		++off;
2145 	}
2146 	return NULL;
2147 }
2148 
2149 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2150 	__acquires(unix_table_lock)
2151 {
2152 	spin_lock(&unix_table_lock);
2153 	return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2154 }
2155 
2156 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2157 {
2158 	struct unix_iter_state *iter = seq->private;
2159 	struct sock *sk = v;
2160 	++*pos;
2161 
2162 	if (v == SEQ_START_TOKEN)
2163 		sk = first_unix_socket(&iter->i);
2164 	else
2165 		sk = next_unix_socket(&iter->i, sk);
2166 	while (sk && (sock_net(sk) != seq_file_net(seq)))
2167 		sk = next_unix_socket(&iter->i, sk);
2168 	return sk;
2169 }
2170 
2171 static void unix_seq_stop(struct seq_file *seq, void *v)
2172 	__releases(unix_table_lock)
2173 {
2174 	spin_unlock(&unix_table_lock);
2175 }
2176 
2177 static int unix_seq_show(struct seq_file *seq, void *v)
2178 {
2179 
2180 	if (v == SEQ_START_TOKEN)
2181 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2182 			 "Inode Path\n");
2183 	else {
2184 		struct sock *s = v;
2185 		struct unix_sock *u = unix_sk(s);
2186 		unix_state_lock(s);
2187 
2188 		seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
2189 			s,
2190 			atomic_read(&s->sk_refcnt),
2191 			0,
2192 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2193 			s->sk_type,
2194 			s->sk_socket ?
2195 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2196 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2197 			sock_i_ino(s));
2198 
2199 		if (u->addr) {
2200 			int i, len;
2201 			seq_putc(seq, ' ');
2202 
2203 			i = 0;
2204 			len = u->addr->len - sizeof(short);
2205 			if (!UNIX_ABSTRACT(s))
2206 				len--;
2207 			else {
2208 				seq_putc(seq, '@');
2209 				i++;
2210 			}
2211 			for ( ; i < len; i++)
2212 				seq_putc(seq, u->addr->name->sun_path[i]);
2213 		}
2214 		unix_state_unlock(s);
2215 		seq_putc(seq, '\n');
2216 	}
2217 
2218 	return 0;
2219 }
2220 
2221 static const struct seq_operations unix_seq_ops = {
2222 	.start  = unix_seq_start,
2223 	.next   = unix_seq_next,
2224 	.stop   = unix_seq_stop,
2225 	.show   = unix_seq_show,
2226 };
2227 
2228 static int unix_seq_open(struct inode *inode, struct file *file)
2229 {
2230 	return seq_open_net(inode, file, &unix_seq_ops,
2231 			    sizeof(struct unix_iter_state));
2232 }
2233 
2234 static const struct file_operations unix_seq_fops = {
2235 	.owner		= THIS_MODULE,
2236 	.open		= unix_seq_open,
2237 	.read		= seq_read,
2238 	.llseek		= seq_lseek,
2239 	.release	= seq_release_net,
2240 };
2241 
2242 #endif
2243 
2244 static const struct net_proto_family unix_family_ops = {
2245 	.family = PF_UNIX,
2246 	.create = unix_create,
2247 	.owner	= THIS_MODULE,
2248 };
2249 
2250 
2251 static int __net_init unix_net_init(struct net *net)
2252 {
2253 	int error = -ENOMEM;
2254 
2255 	net->unx.sysctl_max_dgram_qlen = 10;
2256 	if (unix_sysctl_register(net))
2257 		goto out;
2258 
2259 #ifdef CONFIG_PROC_FS
2260 	if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2261 		unix_sysctl_unregister(net);
2262 		goto out;
2263 	}
2264 #endif
2265 	error = 0;
2266 out:
2267 	return error;
2268 }
2269 
2270 static void __net_exit unix_net_exit(struct net *net)
2271 {
2272 	unix_sysctl_unregister(net);
2273 	proc_net_remove(net, "unix");
2274 }
2275 
2276 static struct pernet_operations unix_net_ops = {
2277 	.init = unix_net_init,
2278 	.exit = unix_net_exit,
2279 };
2280 
2281 static int __init af_unix_init(void)
2282 {
2283 	int rc = -1;
2284 	struct sk_buff *dummy_skb;
2285 
2286 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2287 
2288 	rc = proto_register(&unix_proto, 1);
2289 	if (rc != 0) {
2290 		printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2291 		       __func__);
2292 		goto out;
2293 	}
2294 
2295 	sock_register(&unix_family_ops);
2296 	register_pernet_subsys(&unix_net_ops);
2297 out:
2298 	return rc;
2299 }
2300 
2301 static void __exit af_unix_exit(void)
2302 {
2303 	sock_unregister(PF_UNIX);
2304 	proto_unregister(&unix_proto);
2305 	unregister_pernet_subsys(&unix_net_ops);
2306 }
2307 
2308 /* Earlier than device_initcall() so that other drivers invoking
2309    request_module() don't end up in a loop when modprobe tries
2310    to use a UNIX socket. But later than subsys_initcall() because
2311    we depend on stuff initialised there */
2312 fs_initcall(af_unix_init);
2313 module_exit(af_unix_exit);
2314 
2315 MODULE_LICENSE("GPL");
2316 MODULE_ALIAS_NETPROTO(PF_UNIX);
2317