xref: /linux/net/unix/af_unix.c (revision 7a8fc9b248e77a4eab0613acf30a6811799786b3)
1 /*
2  * NET4:	Implementation of BSD Unix domain sockets.
3  *
4  * Authors:	Alan Cox, <alan.cox@linux.org>
5  *
6  *		This program is free software; you can redistribute it and/or
7  *		modify it under the terms of the GNU General Public License
8  *		as published by the Free Software Foundation; either version
9  *		2 of the License, or (at your option) any later version.
10  *
11  * Fixes:
12  *		Linus Torvalds	:	Assorted bug cures.
13  *		Niibe Yutaka	:	async I/O support.
14  *		Carsten Paeth	:	PF_UNIX check, address fixes.
15  *		Alan Cox	:	Limit size of allocated blocks.
16  *		Alan Cox	:	Fixed the stupid socketpair bug.
17  *		Alan Cox	:	BSD compatibility fine tuning.
18  *		Alan Cox	:	Fixed a bug in connect when interrupted.
19  *		Alan Cox	:	Sorted out a proper draft version of
20  *					file descriptor passing hacked up from
21  *					Mike Shaver's work.
22  *		Marty Leisner	:	Fixes to fd passing
23  *		Nick Nevin	:	recvmsg bugfix.
24  *		Alan Cox	:	Started proper garbage collector
25  *		Heiko EiBfeldt	:	Missing verify_area check
26  *		Alan Cox	:	Started POSIXisms
27  *		Andreas Schwab	:	Replace inode by dentry for proper
28  *					reference counting
29  *		Kirk Petersen	:	Made this a module
30  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
31  *					Lots of bug fixes.
32  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
33  *					by above two patches.
34  *	     Andrea Arcangeli	:	If possible we block in connect(2)
35  *					if the max backlog of the listen socket
36  *					is been reached. This won't break
37  *					old apps and it will avoid huge amount
38  *					of socks hashed (this for unix_gc()
39  *					performances reasons).
40  *					Security fix that limits the max
41  *					number of socks to 2*max_files and
42  *					the number of skb queueable in the
43  *					dgram receiver.
44  *		Artur Skawina   :	Hash function optimizations
45  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
46  *	      Malcolm Beattie   :	Set peercred for socketpair
47  *	     Michal Ostrowski   :       Module initialization cleanup.
48  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
49  *	     				the core infrastructure is doing that
50  *	     				for all net proto families now (2.5.69+)
51  *
52  *
53  * Known differences from reference BSD that was tested:
54  *
55  *	[TO FIX]
56  *	ECONNREFUSED is not returned from one end of a connected() socket to the
57  *		other the moment one end closes.
58  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
59  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
60  *	[NOT TO FIX]
61  *	accept() returns a path name even if the connecting socket has closed
62  *		in the meantime (BSD loses the path and gives up).
63  *	accept() returns 0 length path for an unbound connector. BSD returns 16
64  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
66  *	BSD af_unix apparently has connect forgetting to block properly.
67  *		(need to check this with the POSIX spec in detail)
68  *
69  * Differences from 2.0.0-11-... (ANK)
70  *	Bug fixes and improvements.
71  *		- client shutdown killed server socket.
72  *		- removed all useless cli/sti pairs.
73  *
74  *	Semantic changes/extensions.
75  *		- generic control message passing.
76  *		- SCM_CREDENTIALS control message.
77  *		- "Abstract" (not FS based) socket bindings.
78  *		  Abstract names are sequences of bytes (not zero terminated)
79  *		  started by 0, so that this name space does not intersect
80  *		  with BSD names.
81  */
82 
83 #include <linux/module.h>
84 #include <linux/kernel.h>
85 #include <linux/signal.h>
86 #include <linux/sched.h>
87 #include <linux/errno.h>
88 #include <linux/string.h>
89 #include <linux/stat.h>
90 #include <linux/dcache.h>
91 #include <linux/namei.h>
92 #include <linux/socket.h>
93 #include <linux/un.h>
94 #include <linux/fcntl.h>
95 #include <linux/termios.h>
96 #include <linux/sockios.h>
97 #include <linux/net.h>
98 #include <linux/in.h>
99 #include <linux/fs.h>
100 #include <linux/slab.h>
101 #include <asm/uaccess.h>
102 #include <linux/skbuff.h>
103 #include <linux/netdevice.h>
104 #include <net/net_namespace.h>
105 #include <net/sock.h>
106 #include <net/tcp_states.h>
107 #include <net/af_unix.h>
108 #include <linux/proc_fs.h>
109 #include <linux/seq_file.h>
110 #include <net/scm.h>
111 #include <linux/init.h>
112 #include <linux/poll.h>
113 #include <linux/rtnetlink.h>
114 #include <linux/mount.h>
115 #include <net/checksum.h>
116 #include <linux/security.h>
117 
118 static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
119 static DEFINE_SPINLOCK(unix_table_lock);
120 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
121 
122 #define unix_sockets_unbound	(&unix_socket_table[UNIX_HASH_SIZE])
123 
124 #define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
125 
126 #ifdef CONFIG_SECURITY_NETWORK
127 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
128 {
129 	memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
130 }
131 
132 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
133 {
134 	scm->secid = *UNIXSID(skb);
135 }
136 #else
137 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
138 { }
139 
140 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
141 { }
142 #endif /* CONFIG_SECURITY_NETWORK */
143 
144 /*
145  *  SMP locking strategy:
146  *    hash table is protected with spinlock unix_table_lock
147  *    each socket state is protected by separate rwlock.
148  */
149 
150 static inline unsigned unix_hash_fold(__wsum n)
151 {
152 	unsigned hash = (__force unsigned)n;
153 	hash ^= hash>>16;
154 	hash ^= hash>>8;
155 	return hash&(UNIX_HASH_SIZE-1);
156 }
157 
158 #define unix_peer(sk) (unix_sk(sk)->peer)
159 
160 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
161 {
162 	return unix_peer(osk) == sk;
163 }
164 
165 static inline int unix_may_send(struct sock *sk, struct sock *osk)
166 {
167 	return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
168 }
169 
170 static inline int unix_recvq_full(struct sock const *sk)
171 {
172 	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
173 }
174 
175 static struct sock *unix_peer_get(struct sock *s)
176 {
177 	struct sock *peer;
178 
179 	unix_state_lock(s);
180 	peer = unix_peer(s);
181 	if (peer)
182 		sock_hold(peer);
183 	unix_state_unlock(s);
184 	return peer;
185 }
186 
187 static inline void unix_release_addr(struct unix_address *addr)
188 {
189 	if (atomic_dec_and_test(&addr->refcnt))
190 		kfree(addr);
191 }
192 
193 /*
194  *	Check unix socket name:
195  *		- should be not zero length.
196  *	        - if started by not zero, should be NULL terminated (FS object)
197  *		- if started by zero, it is abstract name.
198  */
199 
200 static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
201 {
202 	if (len <= sizeof(short) || len > sizeof(*sunaddr))
203 		return -EINVAL;
204 	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
205 		return -EINVAL;
206 	if (sunaddr->sun_path[0]) {
207 		/*
208 		 * This may look like an off by one error but it is a bit more
209 		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
210 		 * sun_path[108] doesnt as such exist.  However in kernel space
211 		 * we are guaranteed that it is a valid memory location in our
212 		 * kernel address buffer.
213 		 */
214 		((char *)sunaddr)[len]=0;
215 		len = strlen(sunaddr->sun_path)+1+sizeof(short);
216 		return len;
217 	}
218 
219 	*hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0));
220 	return len;
221 }
222 
223 static void __unix_remove_socket(struct sock *sk)
224 {
225 	sk_del_node_init(sk);
226 }
227 
228 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
229 {
230 	WARN_ON(!sk_unhashed(sk));
231 	sk_add_node(sk, list);
232 }
233 
234 static inline void unix_remove_socket(struct sock *sk)
235 {
236 	spin_lock(&unix_table_lock);
237 	__unix_remove_socket(sk);
238 	spin_unlock(&unix_table_lock);
239 }
240 
241 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
242 {
243 	spin_lock(&unix_table_lock);
244 	__unix_insert_socket(list, sk);
245 	spin_unlock(&unix_table_lock);
246 }
247 
248 static struct sock *__unix_find_socket_byname(struct net *net,
249 					      struct sockaddr_un *sunname,
250 					      int len, int type, unsigned hash)
251 {
252 	struct sock *s;
253 	struct hlist_node *node;
254 
255 	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
256 		struct unix_sock *u = unix_sk(s);
257 
258 		if (!net_eq(sock_net(s), net))
259 			continue;
260 
261 		if (u->addr->len == len &&
262 		    !memcmp(u->addr->name, sunname, len))
263 			goto found;
264 	}
265 	s = NULL;
266 found:
267 	return s;
268 }
269 
270 static inline struct sock *unix_find_socket_byname(struct net *net,
271 						   struct sockaddr_un *sunname,
272 						   int len, int type,
273 						   unsigned hash)
274 {
275 	struct sock *s;
276 
277 	spin_lock(&unix_table_lock);
278 	s = __unix_find_socket_byname(net, sunname, len, type, hash);
279 	if (s)
280 		sock_hold(s);
281 	spin_unlock(&unix_table_lock);
282 	return s;
283 }
284 
285 static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
286 {
287 	struct sock *s;
288 	struct hlist_node *node;
289 
290 	spin_lock(&unix_table_lock);
291 	sk_for_each(s, node,
292 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
293 		struct dentry *dentry = unix_sk(s)->dentry;
294 
295 		if (!net_eq(sock_net(s), net))
296 			continue;
297 
298 		if(dentry && dentry->d_inode == i)
299 		{
300 			sock_hold(s);
301 			goto found;
302 		}
303 	}
304 	s = NULL;
305 found:
306 	spin_unlock(&unix_table_lock);
307 	return s;
308 }
309 
310 static inline int unix_writable(struct sock *sk)
311 {
312 	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
313 }
314 
315 static void unix_write_space(struct sock *sk)
316 {
317 	read_lock(&sk->sk_callback_lock);
318 	if (unix_writable(sk)) {
319 		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
320 			wake_up_interruptible_sync(sk->sk_sleep);
321 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
322 	}
323 	read_unlock(&sk->sk_callback_lock);
324 }
325 
326 /* When dgram socket disconnects (or changes its peer), we clear its receive
327  * queue of packets arrived from previous peer. First, it allows to do
328  * flow control based only on wmem_alloc; second, sk connected to peer
329  * may receive messages only from that peer. */
330 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
331 {
332 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
333 		skb_queue_purge(&sk->sk_receive_queue);
334 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
335 
336 		/* If one link of bidirectional dgram pipe is disconnected,
337 		 * we signal error. Messages are lost. Do not make this,
338 		 * when peer was not connected to us.
339 		 */
340 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
341 			other->sk_err = ECONNRESET;
342 			other->sk_error_report(other);
343 		}
344 	}
345 }
346 
347 static void unix_sock_destructor(struct sock *sk)
348 {
349 	struct unix_sock *u = unix_sk(sk);
350 
351 	skb_queue_purge(&sk->sk_receive_queue);
352 
353 	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
354 	WARN_ON(!sk_unhashed(sk));
355 	WARN_ON(sk->sk_socket);
356 	if (!sock_flag(sk, SOCK_DEAD)) {
357 		printk("Attempt to release alive unix socket: %p\n", sk);
358 		return;
359 	}
360 
361 	if (u->addr)
362 		unix_release_addr(u->addr);
363 
364 	atomic_dec(&unix_nr_socks);
365 #ifdef UNIX_REFCNT_DEBUG
366 	printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));
367 #endif
368 }
369 
370 static int unix_release_sock (struct sock *sk, int embrion)
371 {
372 	struct unix_sock *u = unix_sk(sk);
373 	struct dentry *dentry;
374 	struct vfsmount *mnt;
375 	struct sock *skpair;
376 	struct sk_buff *skb;
377 	int state;
378 
379 	unix_remove_socket(sk);
380 
381 	/* Clear state */
382 	unix_state_lock(sk);
383 	sock_orphan(sk);
384 	sk->sk_shutdown = SHUTDOWN_MASK;
385 	dentry	     = u->dentry;
386 	u->dentry    = NULL;
387 	mnt	     = u->mnt;
388 	u->mnt	     = NULL;
389 	state = sk->sk_state;
390 	sk->sk_state = TCP_CLOSE;
391 	unix_state_unlock(sk);
392 
393 	wake_up_interruptible_all(&u->peer_wait);
394 
395 	skpair=unix_peer(sk);
396 
397 	if (skpair!=NULL) {
398 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
399 			unix_state_lock(skpair);
400 			/* No more writes */
401 			skpair->sk_shutdown = SHUTDOWN_MASK;
402 			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
403 				skpair->sk_err = ECONNRESET;
404 			unix_state_unlock(skpair);
405 			skpair->sk_state_change(skpair);
406 			read_lock(&skpair->sk_callback_lock);
407 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
408 			read_unlock(&skpair->sk_callback_lock);
409 		}
410 		sock_put(skpair); /* It may now die */
411 		unix_peer(sk) = NULL;
412 	}
413 
414 	/* Try to flush out this socket. Throw out buffers at least */
415 
416 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
417 		if (state==TCP_LISTEN)
418 			unix_release_sock(skb->sk, 1);
419 		/* passed fds are erased in the kfree_skb hook	      */
420 		kfree_skb(skb);
421 	}
422 
423 	if (dentry) {
424 		dput(dentry);
425 		mntput(mnt);
426 	}
427 
428 	sock_put(sk);
429 
430 	/* ---- Socket is dead now and most probably destroyed ---- */
431 
432 	/*
433 	 * Fixme: BSD difference: In BSD all sockets connected to use get
434 	 *	  ECONNRESET and we die on the spot. In Linux we behave
435 	 *	  like files and pipes do and wait for the last
436 	 *	  dereference.
437 	 *
438 	 * Can't we simply set sock->err?
439 	 *
440 	 *	  What the above comment does talk about? --ANK(980817)
441 	 */
442 
443 	if (unix_tot_inflight)
444 		unix_gc();		/* Garbage collect fds */
445 
446 	return 0;
447 }
448 
449 static int unix_listen(struct socket *sock, int backlog)
450 {
451 	int err;
452 	struct sock *sk = sock->sk;
453 	struct unix_sock *u = unix_sk(sk);
454 
455 	err = -EOPNOTSUPP;
456 	if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
457 		goto out;			/* Only stream/seqpacket sockets accept */
458 	err = -EINVAL;
459 	if (!u->addr)
460 		goto out;			/* No listens on an unbound socket */
461 	unix_state_lock(sk);
462 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
463 		goto out_unlock;
464 	if (backlog > sk->sk_max_ack_backlog)
465 		wake_up_interruptible_all(&u->peer_wait);
466 	sk->sk_max_ack_backlog	= backlog;
467 	sk->sk_state		= TCP_LISTEN;
468 	/* set credentials so connect can copy them */
469 	sk->sk_peercred.pid	= task_tgid_vnr(current);
470 	sk->sk_peercred.uid	= current->euid;
471 	sk->sk_peercred.gid	= current->egid;
472 	err = 0;
473 
474 out_unlock:
475 	unix_state_unlock(sk);
476 out:
477 	return err;
478 }
479 
480 static int unix_release(struct socket *);
481 static int unix_bind(struct socket *, struct sockaddr *, int);
482 static int unix_stream_connect(struct socket *, struct sockaddr *,
483 			       int addr_len, int flags);
484 static int unix_socketpair(struct socket *, struct socket *);
485 static int unix_accept(struct socket *, struct socket *, int);
486 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
487 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
488 static unsigned int unix_dgram_poll(struct file *, struct socket *,
489 				    poll_table *);
490 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
491 static int unix_shutdown(struct socket *, int);
492 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
493 			       struct msghdr *, size_t);
494 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
495 			       struct msghdr *, size_t, int);
496 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
497 			      struct msghdr *, size_t);
498 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
499 			      struct msghdr *, size_t, int);
500 static int unix_dgram_connect(struct socket *, struct sockaddr *,
501 			      int, int);
502 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
503 				  struct msghdr *, size_t);
504 
505 static const struct proto_ops unix_stream_ops = {
506 	.family =	PF_UNIX,
507 	.owner =	THIS_MODULE,
508 	.release =	unix_release,
509 	.bind =		unix_bind,
510 	.connect =	unix_stream_connect,
511 	.socketpair =	unix_socketpair,
512 	.accept =	unix_accept,
513 	.getname =	unix_getname,
514 	.poll =		unix_poll,
515 	.ioctl =	unix_ioctl,
516 	.listen =	unix_listen,
517 	.shutdown =	unix_shutdown,
518 	.setsockopt =	sock_no_setsockopt,
519 	.getsockopt =	sock_no_getsockopt,
520 	.sendmsg =	unix_stream_sendmsg,
521 	.recvmsg =	unix_stream_recvmsg,
522 	.mmap =		sock_no_mmap,
523 	.sendpage =	sock_no_sendpage,
524 };
525 
526 static const struct proto_ops unix_dgram_ops = {
527 	.family =	PF_UNIX,
528 	.owner =	THIS_MODULE,
529 	.release =	unix_release,
530 	.bind =		unix_bind,
531 	.connect =	unix_dgram_connect,
532 	.socketpair =	unix_socketpair,
533 	.accept =	sock_no_accept,
534 	.getname =	unix_getname,
535 	.poll =		unix_dgram_poll,
536 	.ioctl =	unix_ioctl,
537 	.listen =	sock_no_listen,
538 	.shutdown =	unix_shutdown,
539 	.setsockopt =	sock_no_setsockopt,
540 	.getsockopt =	sock_no_getsockopt,
541 	.sendmsg =	unix_dgram_sendmsg,
542 	.recvmsg =	unix_dgram_recvmsg,
543 	.mmap =		sock_no_mmap,
544 	.sendpage =	sock_no_sendpage,
545 };
546 
547 static const struct proto_ops unix_seqpacket_ops = {
548 	.family =	PF_UNIX,
549 	.owner =	THIS_MODULE,
550 	.release =	unix_release,
551 	.bind =		unix_bind,
552 	.connect =	unix_stream_connect,
553 	.socketpair =	unix_socketpair,
554 	.accept =	unix_accept,
555 	.getname =	unix_getname,
556 	.poll =		unix_dgram_poll,
557 	.ioctl =	unix_ioctl,
558 	.listen =	unix_listen,
559 	.shutdown =	unix_shutdown,
560 	.setsockopt =	sock_no_setsockopt,
561 	.getsockopt =	sock_no_getsockopt,
562 	.sendmsg =	unix_seqpacket_sendmsg,
563 	.recvmsg =	unix_dgram_recvmsg,
564 	.mmap =		sock_no_mmap,
565 	.sendpage =	sock_no_sendpage,
566 };
567 
568 static struct proto unix_proto = {
569 	.name	  = "UNIX",
570 	.owner	  = THIS_MODULE,
571 	.obj_size = sizeof(struct unix_sock),
572 };
573 
574 /*
575  * AF_UNIX sockets do not interact with hardware, hence they
576  * dont trigger interrupts - so it's safe for them to have
577  * bh-unsafe locking for their sk_receive_queue.lock. Split off
578  * this special lock-class by reinitializing the spinlock key:
579  */
580 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
581 
582 static struct sock * unix_create1(struct net *net, struct socket *sock)
583 {
584 	struct sock *sk = NULL;
585 	struct unix_sock *u;
586 
587 	atomic_inc(&unix_nr_socks);
588 	if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
589 		goto out;
590 
591 	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
592 	if (!sk)
593 		goto out;
594 
595 	sock_init_data(sock,sk);
596 	lockdep_set_class(&sk->sk_receive_queue.lock,
597 				&af_unix_sk_receive_queue_lock_key);
598 
599 	sk->sk_write_space	= unix_write_space;
600 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
601 	sk->sk_destruct		= unix_sock_destructor;
602 	u	  = unix_sk(sk);
603 	u->dentry = NULL;
604 	u->mnt	  = NULL;
605 	spin_lock_init(&u->lock);
606 	atomic_long_set(&u->inflight, 0);
607 	INIT_LIST_HEAD(&u->link);
608 	mutex_init(&u->readlock); /* single task reading lock */
609 	init_waitqueue_head(&u->peer_wait);
610 	unix_insert_socket(unix_sockets_unbound, sk);
611 out:
612 	if (sk == NULL)
613 		atomic_dec(&unix_nr_socks);
614 	return sk;
615 }
616 
617 static int unix_create(struct net *net, struct socket *sock, int protocol)
618 {
619 	if (protocol && protocol != PF_UNIX)
620 		return -EPROTONOSUPPORT;
621 
622 	sock->state = SS_UNCONNECTED;
623 
624 	switch (sock->type) {
625 	case SOCK_STREAM:
626 		sock->ops = &unix_stream_ops;
627 		break;
628 		/*
629 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
630 		 *	nothing uses it.
631 		 */
632 	case SOCK_RAW:
633 		sock->type=SOCK_DGRAM;
634 	case SOCK_DGRAM:
635 		sock->ops = &unix_dgram_ops;
636 		break;
637 	case SOCK_SEQPACKET:
638 		sock->ops = &unix_seqpacket_ops;
639 		break;
640 	default:
641 		return -ESOCKTNOSUPPORT;
642 	}
643 
644 	return unix_create1(net, sock) ? 0 : -ENOMEM;
645 }
646 
647 static int unix_release(struct socket *sock)
648 {
649 	struct sock *sk = sock->sk;
650 
651 	if (!sk)
652 		return 0;
653 
654 	sock->sk = NULL;
655 
656 	return unix_release_sock (sk, 0);
657 }
658 
659 static int unix_autobind(struct socket *sock)
660 {
661 	struct sock *sk = sock->sk;
662 	struct net *net = sock_net(sk);
663 	struct unix_sock *u = unix_sk(sk);
664 	static u32 ordernum = 1;
665 	struct unix_address * addr;
666 	int err;
667 
668 	mutex_lock(&u->readlock);
669 
670 	err = 0;
671 	if (u->addr)
672 		goto out;
673 
674 	err = -ENOMEM;
675 	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
676 	if (!addr)
677 		goto out;
678 
679 	addr->name->sun_family = AF_UNIX;
680 	atomic_set(&addr->refcnt, 1);
681 
682 retry:
683 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
684 	addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
685 
686 	spin_lock(&unix_table_lock);
687 	ordernum = (ordernum+1)&0xFFFFF;
688 
689 	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
690 				      addr->hash)) {
691 		spin_unlock(&unix_table_lock);
692 		/* Sanity yield. It is unusual case, but yet... */
693 		if (!(ordernum&0xFF))
694 			yield();
695 		goto retry;
696 	}
697 	addr->hash ^= sk->sk_type;
698 
699 	__unix_remove_socket(sk);
700 	u->addr = addr;
701 	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
702 	spin_unlock(&unix_table_lock);
703 	err = 0;
704 
705 out:	mutex_unlock(&u->readlock);
706 	return err;
707 }
708 
709 static struct sock *unix_find_other(struct net *net,
710 				    struct sockaddr_un *sunname, int len,
711 				    int type, unsigned hash, int *error)
712 {
713 	struct sock *u;
714 	struct nameidata nd;
715 	int err = 0;
716 
717 	if (sunname->sun_path[0]) {
718 		err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
719 		if (err)
720 			goto fail;
721 		err = vfs_permission(&nd, MAY_WRITE);
722 		if (err)
723 			goto put_fail;
724 
725 		err = -ECONNREFUSED;
726 		if (!S_ISSOCK(nd.path.dentry->d_inode->i_mode))
727 			goto put_fail;
728 		u = unix_find_socket_byinode(net, nd.path.dentry->d_inode);
729 		if (!u)
730 			goto put_fail;
731 
732 		if (u->sk_type == type)
733 			touch_atime(nd.path.mnt, nd.path.dentry);
734 
735 		path_put(&nd.path);
736 
737 		err=-EPROTOTYPE;
738 		if (u->sk_type != type) {
739 			sock_put(u);
740 			goto fail;
741 		}
742 	} else {
743 		err = -ECONNREFUSED;
744 		u=unix_find_socket_byname(net, sunname, len, type, hash);
745 		if (u) {
746 			struct dentry *dentry;
747 			dentry = unix_sk(u)->dentry;
748 			if (dentry)
749 				touch_atime(unix_sk(u)->mnt, dentry);
750 		} else
751 			goto fail;
752 	}
753 	return u;
754 
755 put_fail:
756 	path_put(&nd.path);
757 fail:
758 	*error=err;
759 	return NULL;
760 }
761 
762 
763 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
764 {
765 	struct sock *sk = sock->sk;
766 	struct net *net = sock_net(sk);
767 	struct unix_sock *u = unix_sk(sk);
768 	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
769 	struct dentry * dentry = NULL;
770 	struct nameidata nd;
771 	int err;
772 	unsigned hash;
773 	struct unix_address *addr;
774 	struct hlist_head *list;
775 
776 	err = -EINVAL;
777 	if (sunaddr->sun_family != AF_UNIX)
778 		goto out;
779 
780 	if (addr_len==sizeof(short)) {
781 		err = unix_autobind(sock);
782 		goto out;
783 	}
784 
785 	err = unix_mkname(sunaddr, addr_len, &hash);
786 	if (err < 0)
787 		goto out;
788 	addr_len = err;
789 
790 	mutex_lock(&u->readlock);
791 
792 	err = -EINVAL;
793 	if (u->addr)
794 		goto out_up;
795 
796 	err = -ENOMEM;
797 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
798 	if (!addr)
799 		goto out_up;
800 
801 	memcpy(addr->name, sunaddr, addr_len);
802 	addr->len = addr_len;
803 	addr->hash = hash ^ sk->sk_type;
804 	atomic_set(&addr->refcnt, 1);
805 
806 	if (sunaddr->sun_path[0]) {
807 		unsigned int mode;
808 		err = 0;
809 		/*
810 		 * Get the parent directory, calculate the hash for last
811 		 * component.
812 		 */
813 		err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
814 		if (err)
815 			goto out_mknod_parent;
816 
817 		dentry = lookup_create(&nd, 0);
818 		err = PTR_ERR(dentry);
819 		if (IS_ERR(dentry))
820 			goto out_mknod_unlock;
821 
822 		/*
823 		 * All right, let's create it.
824 		 */
825 		mode = S_IFSOCK |
826 		       (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
827 		err = mnt_want_write(nd.path.mnt);
828 		if (err)
829 			goto out_mknod_dput;
830 		err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
831 		mnt_drop_write(nd.path.mnt);
832 		if (err)
833 			goto out_mknod_dput;
834 		mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
835 		dput(nd.path.dentry);
836 		nd.path.dentry = dentry;
837 
838 		addr->hash = UNIX_HASH_SIZE;
839 	}
840 
841 	spin_lock(&unix_table_lock);
842 
843 	if (!sunaddr->sun_path[0]) {
844 		err = -EADDRINUSE;
845 		if (__unix_find_socket_byname(net, sunaddr, addr_len,
846 					      sk->sk_type, hash)) {
847 			unix_release_addr(addr);
848 			goto out_unlock;
849 		}
850 
851 		list = &unix_socket_table[addr->hash];
852 	} else {
853 		list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
854 		u->dentry = nd.path.dentry;
855 		u->mnt    = nd.path.mnt;
856 	}
857 
858 	err = 0;
859 	__unix_remove_socket(sk);
860 	u->addr = addr;
861 	__unix_insert_socket(list, sk);
862 
863 out_unlock:
864 	spin_unlock(&unix_table_lock);
865 out_up:
866 	mutex_unlock(&u->readlock);
867 out:
868 	return err;
869 
870 out_mknod_dput:
871 	dput(dentry);
872 out_mknod_unlock:
873 	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
874 	path_put(&nd.path);
875 out_mknod_parent:
876 	if (err==-EEXIST)
877 		err=-EADDRINUSE;
878 	unix_release_addr(addr);
879 	goto out_up;
880 }
881 
882 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
883 {
884 	if (unlikely(sk1 == sk2) || !sk2) {
885 		unix_state_lock(sk1);
886 		return;
887 	}
888 	if (sk1 < sk2) {
889 		unix_state_lock(sk1);
890 		unix_state_lock_nested(sk2);
891 	} else {
892 		unix_state_lock(sk2);
893 		unix_state_lock_nested(sk1);
894 	}
895 }
896 
897 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
898 {
899 	if (unlikely(sk1 == sk2) || !sk2) {
900 		unix_state_unlock(sk1);
901 		return;
902 	}
903 	unix_state_unlock(sk1);
904 	unix_state_unlock(sk2);
905 }
906 
907 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
908 			      int alen, int flags)
909 {
910 	struct sock *sk = sock->sk;
911 	struct net *net = sock_net(sk);
912 	struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
913 	struct sock *other;
914 	unsigned hash;
915 	int err;
916 
917 	if (addr->sa_family != AF_UNSPEC) {
918 		err = unix_mkname(sunaddr, alen, &hash);
919 		if (err < 0)
920 			goto out;
921 		alen = err;
922 
923 		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
924 		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
925 			goto out;
926 
927 restart:
928 		other=unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
929 		if (!other)
930 			goto out;
931 
932 		unix_state_double_lock(sk, other);
933 
934 		/* Apparently VFS overslept socket death. Retry. */
935 		if (sock_flag(other, SOCK_DEAD)) {
936 			unix_state_double_unlock(sk, other);
937 			sock_put(other);
938 			goto restart;
939 		}
940 
941 		err = -EPERM;
942 		if (!unix_may_send(sk, other))
943 			goto out_unlock;
944 
945 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
946 		if (err)
947 			goto out_unlock;
948 
949 	} else {
950 		/*
951 		 *	1003.1g breaking connected state with AF_UNSPEC
952 		 */
953 		other = NULL;
954 		unix_state_double_lock(sk, other);
955 	}
956 
957 	/*
958 	 * If it was connected, reconnect.
959 	 */
960 	if (unix_peer(sk)) {
961 		struct sock *old_peer = unix_peer(sk);
962 		unix_peer(sk)=other;
963 		unix_state_double_unlock(sk, other);
964 
965 		if (other != old_peer)
966 			unix_dgram_disconnected(sk, old_peer);
967 		sock_put(old_peer);
968 	} else {
969 		unix_peer(sk)=other;
970 		unix_state_double_unlock(sk, other);
971 	}
972 	return 0;
973 
974 out_unlock:
975 	unix_state_double_unlock(sk, other);
976 	sock_put(other);
977 out:
978 	return err;
979 }
980 
981 static long unix_wait_for_peer(struct sock *other, long timeo)
982 {
983 	struct unix_sock *u = unix_sk(other);
984 	int sched;
985 	DEFINE_WAIT(wait);
986 
987 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
988 
989 	sched = !sock_flag(other, SOCK_DEAD) &&
990 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
991 		unix_recvq_full(other);
992 
993 	unix_state_unlock(other);
994 
995 	if (sched)
996 		timeo = schedule_timeout(timeo);
997 
998 	finish_wait(&u->peer_wait, &wait);
999 	return timeo;
1000 }
1001 
1002 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1003 			       int addr_len, int flags)
1004 {
1005 	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1006 	struct sock *sk = sock->sk;
1007 	struct net *net = sock_net(sk);
1008 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1009 	struct sock *newsk = NULL;
1010 	struct sock *other = NULL;
1011 	struct sk_buff *skb = NULL;
1012 	unsigned hash;
1013 	int st;
1014 	int err;
1015 	long timeo;
1016 
1017 	err = unix_mkname(sunaddr, addr_len, &hash);
1018 	if (err < 0)
1019 		goto out;
1020 	addr_len = err;
1021 
1022 	if (test_bit(SOCK_PASSCRED, &sock->flags)
1023 		&& !u->addr && (err = unix_autobind(sock)) != 0)
1024 		goto out;
1025 
1026 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1027 
1028 	/* First of all allocate resources.
1029 	   If we will make it after state is locked,
1030 	   we will have to recheck all again in any case.
1031 	 */
1032 
1033 	err = -ENOMEM;
1034 
1035 	/* create new sock for complete connection */
1036 	newsk = unix_create1(sock_net(sk), NULL);
1037 	if (newsk == NULL)
1038 		goto out;
1039 
1040 	/* Allocate skb for sending to listening sock */
1041 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1042 	if (skb == NULL)
1043 		goto out;
1044 
1045 restart:
1046 	/*  Find listening sock. */
1047 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1048 	if (!other)
1049 		goto out;
1050 
1051 	/* Latch state of peer */
1052 	unix_state_lock(other);
1053 
1054 	/* Apparently VFS overslept socket death. Retry. */
1055 	if (sock_flag(other, SOCK_DEAD)) {
1056 		unix_state_unlock(other);
1057 		sock_put(other);
1058 		goto restart;
1059 	}
1060 
1061 	err = -ECONNREFUSED;
1062 	if (other->sk_state != TCP_LISTEN)
1063 		goto out_unlock;
1064 
1065 	if (unix_recvq_full(other)) {
1066 		err = -EAGAIN;
1067 		if (!timeo)
1068 			goto out_unlock;
1069 
1070 		timeo = unix_wait_for_peer(other, timeo);
1071 
1072 		err = sock_intr_errno(timeo);
1073 		if (signal_pending(current))
1074 			goto out;
1075 		sock_put(other);
1076 		goto restart;
1077 	}
1078 
1079 	/* Latch our state.
1080 
1081 	   It is tricky place. We need to grab write lock and cannot
1082 	   drop lock on peer. It is dangerous because deadlock is
1083 	   possible. Connect to self case and simultaneous
1084 	   attempt to connect are eliminated by checking socket
1085 	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1086 	   check this before attempt to grab lock.
1087 
1088 	   Well, and we have to recheck the state after socket locked.
1089 	 */
1090 	st = sk->sk_state;
1091 
1092 	switch (st) {
1093 	case TCP_CLOSE:
1094 		/* This is ok... continue with connect */
1095 		break;
1096 	case TCP_ESTABLISHED:
1097 		/* Socket is already connected */
1098 		err = -EISCONN;
1099 		goto out_unlock;
1100 	default:
1101 		err = -EINVAL;
1102 		goto out_unlock;
1103 	}
1104 
1105 	unix_state_lock_nested(sk);
1106 
1107 	if (sk->sk_state != st) {
1108 		unix_state_unlock(sk);
1109 		unix_state_unlock(other);
1110 		sock_put(other);
1111 		goto restart;
1112 	}
1113 
1114 	err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1115 	if (err) {
1116 		unix_state_unlock(sk);
1117 		goto out_unlock;
1118 	}
1119 
1120 	/* The way is open! Fastly set all the necessary fields... */
1121 
1122 	sock_hold(sk);
1123 	unix_peer(newsk)	= sk;
1124 	newsk->sk_state		= TCP_ESTABLISHED;
1125 	newsk->sk_type		= sk->sk_type;
1126 	newsk->sk_peercred.pid	= task_tgid_vnr(current);
1127 	newsk->sk_peercred.uid	= current->euid;
1128 	newsk->sk_peercred.gid	= current->egid;
1129 	newu = unix_sk(newsk);
1130 	newsk->sk_sleep		= &newu->peer_wait;
1131 	otheru = unix_sk(other);
1132 
1133 	/* copy address information from listening to new sock*/
1134 	if (otheru->addr) {
1135 		atomic_inc(&otheru->addr->refcnt);
1136 		newu->addr = otheru->addr;
1137 	}
1138 	if (otheru->dentry) {
1139 		newu->dentry	= dget(otheru->dentry);
1140 		newu->mnt	= mntget(otheru->mnt);
1141 	}
1142 
1143 	/* Set credentials */
1144 	sk->sk_peercred = other->sk_peercred;
1145 
1146 	sock->state	= SS_CONNECTED;
1147 	sk->sk_state	= TCP_ESTABLISHED;
1148 	sock_hold(newsk);
1149 
1150 	smp_mb__after_atomic_inc();	/* sock_hold() does an atomic_inc() */
1151 	unix_peer(sk)	= newsk;
1152 
1153 	unix_state_unlock(sk);
1154 
1155 	/* take ten and and send info to listening sock */
1156 	spin_lock(&other->sk_receive_queue.lock);
1157 	__skb_queue_tail(&other->sk_receive_queue, skb);
1158 	spin_unlock(&other->sk_receive_queue.lock);
1159 	unix_state_unlock(other);
1160 	other->sk_data_ready(other, 0);
1161 	sock_put(other);
1162 	return 0;
1163 
1164 out_unlock:
1165 	if (other)
1166 		unix_state_unlock(other);
1167 
1168 out:
1169 	if (skb)
1170 		kfree_skb(skb);
1171 	if (newsk)
1172 		unix_release_sock(newsk, 0);
1173 	if (other)
1174 		sock_put(other);
1175 	return err;
1176 }
1177 
1178 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1179 {
1180 	struct sock *ska=socka->sk, *skb = sockb->sk;
1181 
1182 	/* Join our sockets back to back */
1183 	sock_hold(ska);
1184 	sock_hold(skb);
1185 	unix_peer(ska)=skb;
1186 	unix_peer(skb)=ska;
1187 	ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
1188 	ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
1189 	ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
1190 
1191 	if (ska->sk_type != SOCK_DGRAM) {
1192 		ska->sk_state = TCP_ESTABLISHED;
1193 		skb->sk_state = TCP_ESTABLISHED;
1194 		socka->state  = SS_CONNECTED;
1195 		sockb->state  = SS_CONNECTED;
1196 	}
1197 	return 0;
1198 }
1199 
1200 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1201 {
1202 	struct sock *sk = sock->sk;
1203 	struct sock *tsk;
1204 	struct sk_buff *skb;
1205 	int err;
1206 
1207 	err = -EOPNOTSUPP;
1208 	if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
1209 		goto out;
1210 
1211 	err = -EINVAL;
1212 	if (sk->sk_state != TCP_LISTEN)
1213 		goto out;
1214 
1215 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1216 	 * so that no locks are necessary.
1217 	 */
1218 
1219 	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1220 	if (!skb) {
1221 		/* This means receive shutdown. */
1222 		if (err == 0)
1223 			err = -EINVAL;
1224 		goto out;
1225 	}
1226 
1227 	tsk = skb->sk;
1228 	skb_free_datagram(sk, skb);
1229 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1230 
1231 	/* attach accepted sock to socket */
1232 	unix_state_lock(tsk);
1233 	newsock->state = SS_CONNECTED;
1234 	sock_graft(tsk, newsock);
1235 	unix_state_unlock(tsk);
1236 	return 0;
1237 
1238 out:
1239 	return err;
1240 }
1241 
1242 
1243 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1244 {
1245 	struct sock *sk = sock->sk;
1246 	struct unix_sock *u;
1247 	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1248 	int err = 0;
1249 
1250 	if (peer) {
1251 		sk = unix_peer_get(sk);
1252 
1253 		err = -ENOTCONN;
1254 		if (!sk)
1255 			goto out;
1256 		err = 0;
1257 	} else {
1258 		sock_hold(sk);
1259 	}
1260 
1261 	u = unix_sk(sk);
1262 	unix_state_lock(sk);
1263 	if (!u->addr) {
1264 		sunaddr->sun_family = AF_UNIX;
1265 		sunaddr->sun_path[0] = 0;
1266 		*uaddr_len = sizeof(short);
1267 	} else {
1268 		struct unix_address *addr = u->addr;
1269 
1270 		*uaddr_len = addr->len;
1271 		memcpy(sunaddr, addr->name, *uaddr_len);
1272 	}
1273 	unix_state_unlock(sk);
1274 	sock_put(sk);
1275 out:
1276 	return err;
1277 }
1278 
1279 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1280 {
1281 	int i;
1282 
1283 	scm->fp = UNIXCB(skb).fp;
1284 	skb->destructor = sock_wfree;
1285 	UNIXCB(skb).fp = NULL;
1286 
1287 	for (i=scm->fp->count-1; i>=0; i--)
1288 		unix_notinflight(scm->fp->fp[i]);
1289 }
1290 
1291 static void unix_destruct_fds(struct sk_buff *skb)
1292 {
1293 	struct scm_cookie scm;
1294 	memset(&scm, 0, sizeof(scm));
1295 	unix_detach_fds(&scm, skb);
1296 
1297 	/* Alas, it calls VFS */
1298 	/* So fscking what? fput() had been SMP-safe since the last Summer */
1299 	scm_destroy(&scm);
1300 	sock_wfree(skb);
1301 }
1302 
1303 static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1304 {
1305 	int i;
1306 	for (i=scm->fp->count-1; i>=0; i--)
1307 		unix_inflight(scm->fp->fp[i]);
1308 	UNIXCB(skb).fp = scm->fp;
1309 	skb->destructor = unix_destruct_fds;
1310 	scm->fp = NULL;
1311 }
1312 
1313 /*
1314  *	Send AF_UNIX data.
1315  */
1316 
1317 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1318 			      struct msghdr *msg, size_t len)
1319 {
1320 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1321 	struct sock *sk = sock->sk;
1322 	struct net *net = sock_net(sk);
1323 	struct unix_sock *u = unix_sk(sk);
1324 	struct sockaddr_un *sunaddr=msg->msg_name;
1325 	struct sock *other = NULL;
1326 	int namelen = 0; /* fake GCC */
1327 	int err;
1328 	unsigned hash;
1329 	struct sk_buff *skb;
1330 	long timeo;
1331 	struct scm_cookie tmp_scm;
1332 
1333 	if (NULL == siocb->scm)
1334 		siocb->scm = &tmp_scm;
1335 	err = scm_send(sock, msg, siocb->scm);
1336 	if (err < 0)
1337 		return err;
1338 
1339 	err = -EOPNOTSUPP;
1340 	if (msg->msg_flags&MSG_OOB)
1341 		goto out;
1342 
1343 	if (msg->msg_namelen) {
1344 		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1345 		if (err < 0)
1346 			goto out;
1347 		namelen = err;
1348 	} else {
1349 		sunaddr = NULL;
1350 		err = -ENOTCONN;
1351 		other = unix_peer_get(sk);
1352 		if (!other)
1353 			goto out;
1354 	}
1355 
1356 	if (test_bit(SOCK_PASSCRED, &sock->flags)
1357 		&& !u->addr && (err = unix_autobind(sock)) != 0)
1358 		goto out;
1359 
1360 	err = -EMSGSIZE;
1361 	if (len > sk->sk_sndbuf - 32)
1362 		goto out;
1363 
1364 	skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1365 	if (skb==NULL)
1366 		goto out;
1367 
1368 	memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1369 	if (siocb->scm->fp)
1370 		unix_attach_fds(siocb->scm, skb);
1371 	unix_get_secdata(siocb->scm, skb);
1372 
1373 	skb_reset_transport_header(skb);
1374 	err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
1375 	if (err)
1376 		goto out_free;
1377 
1378 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1379 
1380 restart:
1381 	if (!other) {
1382 		err = -ECONNRESET;
1383 		if (sunaddr == NULL)
1384 			goto out_free;
1385 
1386 		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1387 					hash, &err);
1388 		if (other==NULL)
1389 			goto out_free;
1390 	}
1391 
1392 	unix_state_lock(other);
1393 	err = -EPERM;
1394 	if (!unix_may_send(sk, other))
1395 		goto out_unlock;
1396 
1397 	if (sock_flag(other, SOCK_DEAD)) {
1398 		/*
1399 		 *	Check with 1003.1g - what should
1400 		 *	datagram error
1401 		 */
1402 		unix_state_unlock(other);
1403 		sock_put(other);
1404 
1405 		err = 0;
1406 		unix_state_lock(sk);
1407 		if (unix_peer(sk) == other) {
1408 			unix_peer(sk)=NULL;
1409 			unix_state_unlock(sk);
1410 
1411 			unix_dgram_disconnected(sk, other);
1412 			sock_put(other);
1413 			err = -ECONNREFUSED;
1414 		} else {
1415 			unix_state_unlock(sk);
1416 		}
1417 
1418 		other = NULL;
1419 		if (err)
1420 			goto out_free;
1421 		goto restart;
1422 	}
1423 
1424 	err = -EPIPE;
1425 	if (other->sk_shutdown & RCV_SHUTDOWN)
1426 		goto out_unlock;
1427 
1428 	if (sk->sk_type != SOCK_SEQPACKET) {
1429 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1430 		if (err)
1431 			goto out_unlock;
1432 	}
1433 
1434 	if (unix_peer(other) != sk && unix_recvq_full(other)) {
1435 		if (!timeo) {
1436 			err = -EAGAIN;
1437 			goto out_unlock;
1438 		}
1439 
1440 		timeo = unix_wait_for_peer(other, timeo);
1441 
1442 		err = sock_intr_errno(timeo);
1443 		if (signal_pending(current))
1444 			goto out_free;
1445 
1446 		goto restart;
1447 	}
1448 
1449 	skb_queue_tail(&other->sk_receive_queue, skb);
1450 	unix_state_unlock(other);
1451 	other->sk_data_ready(other, len);
1452 	sock_put(other);
1453 	scm_destroy(siocb->scm);
1454 	return len;
1455 
1456 out_unlock:
1457 	unix_state_unlock(other);
1458 out_free:
1459 	kfree_skb(skb);
1460 out:
1461 	if (other)
1462 		sock_put(other);
1463 	scm_destroy(siocb->scm);
1464 	return err;
1465 }
1466 
1467 
1468 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1469 			       struct msghdr *msg, size_t len)
1470 {
1471 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1472 	struct sock *sk = sock->sk;
1473 	struct sock *other = NULL;
1474 	struct sockaddr_un *sunaddr=msg->msg_name;
1475 	int err,size;
1476 	struct sk_buff *skb;
1477 	int sent=0;
1478 	struct scm_cookie tmp_scm;
1479 
1480 	if (NULL == siocb->scm)
1481 		siocb->scm = &tmp_scm;
1482 	err = scm_send(sock, msg, siocb->scm);
1483 	if (err < 0)
1484 		return err;
1485 
1486 	err = -EOPNOTSUPP;
1487 	if (msg->msg_flags&MSG_OOB)
1488 		goto out_err;
1489 
1490 	if (msg->msg_namelen) {
1491 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1492 		goto out_err;
1493 	} else {
1494 		sunaddr = NULL;
1495 		err = -ENOTCONN;
1496 		other = unix_peer(sk);
1497 		if (!other)
1498 			goto out_err;
1499 	}
1500 
1501 	if (sk->sk_shutdown & SEND_SHUTDOWN)
1502 		goto pipe_err;
1503 
1504 	while(sent < len)
1505 	{
1506 		/*
1507 		 *	Optimisation for the fact that under 0.01% of X
1508 		 *	messages typically need breaking up.
1509 		 */
1510 
1511 		size = len-sent;
1512 
1513 		/* Keep two messages in the pipe so it schedules better */
1514 		if (size > ((sk->sk_sndbuf >> 1) - 64))
1515 			size = (sk->sk_sndbuf >> 1) - 64;
1516 
1517 		if (size > SKB_MAX_ALLOC)
1518 			size = SKB_MAX_ALLOC;
1519 
1520 		/*
1521 		 *	Grab a buffer
1522 		 */
1523 
1524 		skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
1525 
1526 		if (skb==NULL)
1527 			goto out_err;
1528 
1529 		/*
1530 		 *	If you pass two values to the sock_alloc_send_skb
1531 		 *	it tries to grab the large buffer with GFP_NOFS
1532 		 *	(which can fail easily), and if it fails grab the
1533 		 *	fallback size buffer which is under a page and will
1534 		 *	succeed. [Alan]
1535 		 */
1536 		size = min_t(int, size, skb_tailroom(skb));
1537 
1538 		memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1539 		if (siocb->scm->fp)
1540 			unix_attach_fds(siocb->scm, skb);
1541 
1542 		if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
1543 			kfree_skb(skb);
1544 			goto out_err;
1545 		}
1546 
1547 		unix_state_lock(other);
1548 
1549 		if (sock_flag(other, SOCK_DEAD) ||
1550 		    (other->sk_shutdown & RCV_SHUTDOWN))
1551 			goto pipe_err_free;
1552 
1553 		skb_queue_tail(&other->sk_receive_queue, skb);
1554 		unix_state_unlock(other);
1555 		other->sk_data_ready(other, size);
1556 		sent+=size;
1557 	}
1558 
1559 	scm_destroy(siocb->scm);
1560 	siocb->scm = NULL;
1561 
1562 	return sent;
1563 
1564 pipe_err_free:
1565 	unix_state_unlock(other);
1566 	kfree_skb(skb);
1567 pipe_err:
1568 	if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
1569 		send_sig(SIGPIPE,current,0);
1570 	err = -EPIPE;
1571 out_err:
1572 	scm_destroy(siocb->scm);
1573 	siocb->scm = NULL;
1574 	return sent ? : err;
1575 }
1576 
1577 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1578 				  struct msghdr *msg, size_t len)
1579 {
1580 	int err;
1581 	struct sock *sk = sock->sk;
1582 
1583 	err = sock_error(sk);
1584 	if (err)
1585 		return err;
1586 
1587 	if (sk->sk_state != TCP_ESTABLISHED)
1588 		return -ENOTCONN;
1589 
1590 	if (msg->msg_namelen)
1591 		msg->msg_namelen = 0;
1592 
1593 	return unix_dgram_sendmsg(kiocb, sock, msg, len);
1594 }
1595 
1596 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1597 {
1598 	struct unix_sock *u = unix_sk(sk);
1599 
1600 	msg->msg_namelen = 0;
1601 	if (u->addr) {
1602 		msg->msg_namelen = u->addr->len;
1603 		memcpy(msg->msg_name, u->addr->name, u->addr->len);
1604 	}
1605 }
1606 
1607 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1608 			      struct msghdr *msg, size_t size,
1609 			      int flags)
1610 {
1611 	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1612 	struct scm_cookie tmp_scm;
1613 	struct sock *sk = sock->sk;
1614 	struct unix_sock *u = unix_sk(sk);
1615 	int noblock = flags & MSG_DONTWAIT;
1616 	struct sk_buff *skb;
1617 	int err;
1618 
1619 	err = -EOPNOTSUPP;
1620 	if (flags&MSG_OOB)
1621 		goto out;
1622 
1623 	msg->msg_namelen = 0;
1624 
1625 	mutex_lock(&u->readlock);
1626 
1627 	skb = skb_recv_datagram(sk, flags, noblock, &err);
1628 	if (!skb) {
1629 		unix_state_lock(sk);
1630 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1631 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1632 		    (sk->sk_shutdown & RCV_SHUTDOWN))
1633 			err = 0;
1634 		unix_state_unlock(sk);
1635 		goto out_unlock;
1636 	}
1637 
1638 	wake_up_interruptible_sync(&u->peer_wait);
1639 
1640 	if (msg->msg_name)
1641 		unix_copy_addr(msg, skb->sk);
1642 
1643 	if (size > skb->len)
1644 		size = skb->len;
1645 	else if (size < skb->len)
1646 		msg->msg_flags |= MSG_TRUNC;
1647 
1648 	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1649 	if (err)
1650 		goto out_free;
1651 
1652 	if (!siocb->scm) {
1653 		siocb->scm = &tmp_scm;
1654 		memset(&tmp_scm, 0, sizeof(tmp_scm));
1655 	}
1656 	siocb->scm->creds = *UNIXCREDS(skb);
1657 	unix_set_secdata(siocb->scm, skb);
1658 
1659 	if (!(flags & MSG_PEEK))
1660 	{
1661 		if (UNIXCB(skb).fp)
1662 			unix_detach_fds(siocb->scm, skb);
1663 	}
1664 	else
1665 	{
1666 		/* It is questionable: on PEEK we could:
1667 		   - do not return fds - good, but too simple 8)
1668 		   - return fds, and do not return them on read (old strategy,
1669 		     apparently wrong)
1670 		   - clone fds (I chose it for now, it is the most universal
1671 		     solution)
1672 
1673 		   POSIX 1003.1g does not actually define this clearly
1674 		   at all. POSIX 1003.1g doesn't define a lot of things
1675 		   clearly however!
1676 
1677 		*/
1678 		if (UNIXCB(skb).fp)
1679 			siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1680 	}
1681 	err = size;
1682 
1683 	scm_recv(sock, msg, siocb->scm, flags);
1684 
1685 out_free:
1686 	skb_free_datagram(sk,skb);
1687 out_unlock:
1688 	mutex_unlock(&u->readlock);
1689 out:
1690 	return err;
1691 }
1692 
1693 /*
1694  *	Sleep until data has arrive. But check for races..
1695  */
1696 
1697 static long unix_stream_data_wait(struct sock * sk, long timeo)
1698 {
1699 	DEFINE_WAIT(wait);
1700 
1701 	unix_state_lock(sk);
1702 
1703 	for (;;) {
1704 		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1705 
1706 		if (!skb_queue_empty(&sk->sk_receive_queue) ||
1707 		    sk->sk_err ||
1708 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1709 		    signal_pending(current) ||
1710 		    !timeo)
1711 			break;
1712 
1713 		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1714 		unix_state_unlock(sk);
1715 		timeo = schedule_timeout(timeo);
1716 		unix_state_lock(sk);
1717 		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1718 	}
1719 
1720 	finish_wait(sk->sk_sleep, &wait);
1721 	unix_state_unlock(sk);
1722 	return timeo;
1723 }
1724 
1725 
1726 
1727 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1728 			       struct msghdr *msg, size_t size,
1729 			       int flags)
1730 {
1731 	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1732 	struct scm_cookie tmp_scm;
1733 	struct sock *sk = sock->sk;
1734 	struct unix_sock *u = unix_sk(sk);
1735 	struct sockaddr_un *sunaddr=msg->msg_name;
1736 	int copied = 0;
1737 	int check_creds = 0;
1738 	int target;
1739 	int err = 0;
1740 	long timeo;
1741 
1742 	err = -EINVAL;
1743 	if (sk->sk_state != TCP_ESTABLISHED)
1744 		goto out;
1745 
1746 	err = -EOPNOTSUPP;
1747 	if (flags&MSG_OOB)
1748 		goto out;
1749 
1750 	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1751 	timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1752 
1753 	msg->msg_namelen = 0;
1754 
1755 	/* Lock the socket to prevent queue disordering
1756 	 * while sleeps in memcpy_tomsg
1757 	 */
1758 
1759 	if (!siocb->scm) {
1760 		siocb->scm = &tmp_scm;
1761 		memset(&tmp_scm, 0, sizeof(tmp_scm));
1762 	}
1763 
1764 	mutex_lock(&u->readlock);
1765 
1766 	do
1767 	{
1768 		int chunk;
1769 		struct sk_buff *skb;
1770 
1771 		unix_state_lock(sk);
1772 		skb = skb_dequeue(&sk->sk_receive_queue);
1773 		if (skb==NULL)
1774 		{
1775 			if (copied >= target)
1776 				goto unlock;
1777 
1778 			/*
1779 			 *	POSIX 1003.1g mandates this order.
1780 			 */
1781 
1782 			if ((err = sock_error(sk)) != 0)
1783 				goto unlock;
1784 			if (sk->sk_shutdown & RCV_SHUTDOWN)
1785 				goto unlock;
1786 
1787 			unix_state_unlock(sk);
1788 			err = -EAGAIN;
1789 			if (!timeo)
1790 				break;
1791 			mutex_unlock(&u->readlock);
1792 
1793 			timeo = unix_stream_data_wait(sk, timeo);
1794 
1795 			if (signal_pending(current)) {
1796 				err = sock_intr_errno(timeo);
1797 				goto out;
1798 			}
1799 			mutex_lock(&u->readlock);
1800 			continue;
1801  unlock:
1802 			unix_state_unlock(sk);
1803 			break;
1804 		}
1805 		unix_state_unlock(sk);
1806 
1807 		if (check_creds) {
1808 			/* Never glue messages from different writers */
1809 			if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, sizeof(siocb->scm->creds)) != 0) {
1810 				skb_queue_head(&sk->sk_receive_queue, skb);
1811 				break;
1812 			}
1813 		} else {
1814 			/* Copy credentials */
1815 			siocb->scm->creds = *UNIXCREDS(skb);
1816 			check_creds = 1;
1817 		}
1818 
1819 		/* Copy address just once */
1820 		if (sunaddr)
1821 		{
1822 			unix_copy_addr(msg, skb->sk);
1823 			sunaddr = NULL;
1824 		}
1825 
1826 		chunk = min_t(unsigned int, skb->len, size);
1827 		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1828 			skb_queue_head(&sk->sk_receive_queue, skb);
1829 			if (copied == 0)
1830 				copied = -EFAULT;
1831 			break;
1832 		}
1833 		copied += chunk;
1834 		size -= chunk;
1835 
1836 		/* Mark read part of skb as used */
1837 		if (!(flags & MSG_PEEK))
1838 		{
1839 			skb_pull(skb, chunk);
1840 
1841 			if (UNIXCB(skb).fp)
1842 				unix_detach_fds(siocb->scm, skb);
1843 
1844 			/* put the skb back if we didn't use it up.. */
1845 			if (skb->len)
1846 			{
1847 				skb_queue_head(&sk->sk_receive_queue, skb);
1848 				break;
1849 			}
1850 
1851 			kfree_skb(skb);
1852 
1853 			if (siocb->scm->fp)
1854 				break;
1855 		}
1856 		else
1857 		{
1858 			/* It is questionable, see note in unix_dgram_recvmsg.
1859 			 */
1860 			if (UNIXCB(skb).fp)
1861 				siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1862 
1863 			/* put message back and return */
1864 			skb_queue_head(&sk->sk_receive_queue, skb);
1865 			break;
1866 		}
1867 	} while (size);
1868 
1869 	mutex_unlock(&u->readlock);
1870 	scm_recv(sock, msg, siocb->scm, flags);
1871 out:
1872 	return copied ? : err;
1873 }
1874 
1875 static int unix_shutdown(struct socket *sock, int mode)
1876 {
1877 	struct sock *sk = sock->sk;
1878 	struct sock *other;
1879 
1880 	mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1881 
1882 	if (mode) {
1883 		unix_state_lock(sk);
1884 		sk->sk_shutdown |= mode;
1885 		other=unix_peer(sk);
1886 		if (other)
1887 			sock_hold(other);
1888 		unix_state_unlock(sk);
1889 		sk->sk_state_change(sk);
1890 
1891 		if (other &&
1892 			(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1893 
1894 			int peer_mode = 0;
1895 
1896 			if (mode&RCV_SHUTDOWN)
1897 				peer_mode |= SEND_SHUTDOWN;
1898 			if (mode&SEND_SHUTDOWN)
1899 				peer_mode |= RCV_SHUTDOWN;
1900 			unix_state_lock(other);
1901 			other->sk_shutdown |= peer_mode;
1902 			unix_state_unlock(other);
1903 			other->sk_state_change(other);
1904 			read_lock(&other->sk_callback_lock);
1905 			if (peer_mode == SHUTDOWN_MASK)
1906 				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
1907 			else if (peer_mode & RCV_SHUTDOWN)
1908 				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1909 			read_unlock(&other->sk_callback_lock);
1910 		}
1911 		if (other)
1912 			sock_put(other);
1913 	}
1914 	return 0;
1915 }
1916 
1917 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1918 {
1919 	struct sock *sk = sock->sk;
1920 	long amount=0;
1921 	int err;
1922 
1923 	switch(cmd)
1924 	{
1925 		case SIOCOUTQ:
1926 			amount = atomic_read(&sk->sk_wmem_alloc);
1927 			err = put_user(amount, (int __user *)arg);
1928 			break;
1929 		case SIOCINQ:
1930 		{
1931 			struct sk_buff *skb;
1932 
1933 			if (sk->sk_state == TCP_LISTEN) {
1934 				err = -EINVAL;
1935 				break;
1936 			}
1937 
1938 			spin_lock(&sk->sk_receive_queue.lock);
1939 			if (sk->sk_type == SOCK_STREAM ||
1940 			    sk->sk_type == SOCK_SEQPACKET) {
1941 				skb_queue_walk(&sk->sk_receive_queue, skb)
1942 					amount += skb->len;
1943 			} else {
1944 				skb = skb_peek(&sk->sk_receive_queue);
1945 				if (skb)
1946 					amount=skb->len;
1947 			}
1948 			spin_unlock(&sk->sk_receive_queue.lock);
1949 			err = put_user(amount, (int __user *)arg);
1950 			break;
1951 		}
1952 
1953 		default:
1954 			err = -ENOIOCTLCMD;
1955 			break;
1956 	}
1957 	return err;
1958 }
1959 
1960 static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait)
1961 {
1962 	struct sock *sk = sock->sk;
1963 	unsigned int mask;
1964 
1965 	poll_wait(file, sk->sk_sleep, wait);
1966 	mask = 0;
1967 
1968 	/* exceptional events? */
1969 	if (sk->sk_err)
1970 		mask |= POLLERR;
1971 	if (sk->sk_shutdown == SHUTDOWN_MASK)
1972 		mask |= POLLHUP;
1973 	if (sk->sk_shutdown & RCV_SHUTDOWN)
1974 		mask |= POLLRDHUP;
1975 
1976 	/* readable? */
1977 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
1978 	    (sk->sk_shutdown & RCV_SHUTDOWN))
1979 		mask |= POLLIN | POLLRDNORM;
1980 
1981 	/* Connection-based need to check for termination and startup */
1982 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE)
1983 		mask |= POLLHUP;
1984 
1985 	/*
1986 	 * we set writable also when the other side has shut down the
1987 	 * connection. This prevents stuck sockets.
1988 	 */
1989 	if (unix_writable(sk))
1990 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
1991 
1992 	return mask;
1993 }
1994 
1995 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
1996 				    poll_table *wait)
1997 {
1998 	struct sock *sk = sock->sk, *other;
1999 	unsigned int mask, writable;
2000 
2001 	poll_wait(file, sk->sk_sleep, wait);
2002 	mask = 0;
2003 
2004 	/* exceptional events? */
2005 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2006 		mask |= POLLERR;
2007 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2008 		mask |= POLLRDHUP;
2009 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2010 		mask |= POLLHUP;
2011 
2012 	/* readable? */
2013 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
2014 	    (sk->sk_shutdown & RCV_SHUTDOWN))
2015 		mask |= POLLIN | POLLRDNORM;
2016 
2017 	/* Connection-based need to check for termination and startup */
2018 	if (sk->sk_type == SOCK_SEQPACKET) {
2019 		if (sk->sk_state == TCP_CLOSE)
2020 			mask |= POLLHUP;
2021 		/* connection hasn't started yet? */
2022 		if (sk->sk_state == TCP_SYN_SENT)
2023 			return mask;
2024 	}
2025 
2026 	/* writable? */
2027 	writable = unix_writable(sk);
2028 	if (writable) {
2029 		other = unix_peer_get(sk);
2030 		if (other) {
2031 			if (unix_peer(other) != sk) {
2032 				poll_wait(file, &unix_sk(other)->peer_wait,
2033 					  wait);
2034 				if (unix_recvq_full(other))
2035 					writable = 0;
2036 			}
2037 
2038 			sock_put(other);
2039 		}
2040 	}
2041 
2042 	if (writable)
2043 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2044 	else
2045 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2046 
2047 	return mask;
2048 }
2049 
2050 #ifdef CONFIG_PROC_FS
2051 static struct sock *first_unix_socket(int *i)
2052 {
2053 	for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
2054 		if (!hlist_empty(&unix_socket_table[*i]))
2055 			return __sk_head(&unix_socket_table[*i]);
2056 	}
2057 	return NULL;
2058 }
2059 
2060 static struct sock *next_unix_socket(int *i, struct sock *s)
2061 {
2062 	struct sock *next = sk_next(s);
2063 	/* More in this chain? */
2064 	if (next)
2065 		return next;
2066 	/* Look for next non-empty chain. */
2067 	for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2068 		if (!hlist_empty(&unix_socket_table[*i]))
2069 			return __sk_head(&unix_socket_table[*i]);
2070 	}
2071 	return NULL;
2072 }
2073 
2074 struct unix_iter_state {
2075 	struct seq_net_private p;
2076 	int i;
2077 };
2078 static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
2079 {
2080 	struct unix_iter_state *iter = seq->private;
2081 	loff_t off = 0;
2082 	struct sock *s;
2083 
2084 	for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2085 		if (sock_net(s) != seq_file_net(seq))
2086 			continue;
2087 		if (off == pos)
2088 			return s;
2089 		++off;
2090 	}
2091 	return NULL;
2092 }
2093 
2094 
2095 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2096 	__acquires(unix_table_lock)
2097 {
2098 	spin_lock(&unix_table_lock);
2099 	return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2100 }
2101 
2102 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2103 {
2104 	struct unix_iter_state *iter = seq->private;
2105 	struct sock *sk = v;
2106 	++*pos;
2107 
2108 	if (v == SEQ_START_TOKEN)
2109 		sk = first_unix_socket(&iter->i);
2110 	else
2111 		sk = next_unix_socket(&iter->i, sk);
2112 	while (sk && (sock_net(sk) != seq_file_net(seq)))
2113 		sk = next_unix_socket(&iter->i, sk);
2114 	return sk;
2115 }
2116 
2117 static void unix_seq_stop(struct seq_file *seq, void *v)
2118 	__releases(unix_table_lock)
2119 {
2120 	spin_unlock(&unix_table_lock);
2121 }
2122 
2123 static int unix_seq_show(struct seq_file *seq, void *v)
2124 {
2125 
2126 	if (v == SEQ_START_TOKEN)
2127 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2128 			 "Inode Path\n");
2129 	else {
2130 		struct sock *s = v;
2131 		struct unix_sock *u = unix_sk(s);
2132 		unix_state_lock(s);
2133 
2134 		seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
2135 			s,
2136 			atomic_read(&s->sk_refcnt),
2137 			0,
2138 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2139 			s->sk_type,
2140 			s->sk_socket ?
2141 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2142 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2143 			sock_i_ino(s));
2144 
2145 		if (u->addr) {
2146 			int i, len;
2147 			seq_putc(seq, ' ');
2148 
2149 			i = 0;
2150 			len = u->addr->len - sizeof(short);
2151 			if (!UNIX_ABSTRACT(s))
2152 				len--;
2153 			else {
2154 				seq_putc(seq, '@');
2155 				i++;
2156 			}
2157 			for ( ; i < len; i++)
2158 				seq_putc(seq, u->addr->name->sun_path[i]);
2159 		}
2160 		unix_state_unlock(s);
2161 		seq_putc(seq, '\n');
2162 	}
2163 
2164 	return 0;
2165 }
2166 
2167 static const struct seq_operations unix_seq_ops = {
2168 	.start  = unix_seq_start,
2169 	.next   = unix_seq_next,
2170 	.stop   = unix_seq_stop,
2171 	.show   = unix_seq_show,
2172 };
2173 
2174 
2175 static int unix_seq_open(struct inode *inode, struct file *file)
2176 {
2177 	return seq_open_net(inode, file, &unix_seq_ops,
2178 			    sizeof(struct unix_iter_state));
2179 }
2180 
2181 static const struct file_operations unix_seq_fops = {
2182 	.owner		= THIS_MODULE,
2183 	.open		= unix_seq_open,
2184 	.read		= seq_read,
2185 	.llseek		= seq_lseek,
2186 	.release	= seq_release_net,
2187 };
2188 
2189 #endif
2190 
2191 static struct net_proto_family unix_family_ops = {
2192 	.family = PF_UNIX,
2193 	.create = unix_create,
2194 	.owner	= THIS_MODULE,
2195 };
2196 
2197 
2198 static int unix_net_init(struct net *net)
2199 {
2200 	int error = -ENOMEM;
2201 
2202 	net->unx.sysctl_max_dgram_qlen = 10;
2203 	if (unix_sysctl_register(net))
2204 		goto out;
2205 
2206 #ifdef CONFIG_PROC_FS
2207 	if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2208 		unix_sysctl_unregister(net);
2209 		goto out;
2210 	}
2211 #endif
2212 	error = 0;
2213 out:
2214 	return 0;
2215 }
2216 
2217 static void unix_net_exit(struct net *net)
2218 {
2219 	unix_sysctl_unregister(net);
2220 	proc_net_remove(net, "unix");
2221 }
2222 
2223 static struct pernet_operations unix_net_ops = {
2224 	.init = unix_net_init,
2225 	.exit = unix_net_exit,
2226 };
2227 
2228 static int __init af_unix_init(void)
2229 {
2230 	int rc = -1;
2231 	struct sk_buff *dummy_skb;
2232 
2233 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2234 
2235 	rc = proto_register(&unix_proto, 1);
2236 	if (rc != 0) {
2237 		printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2238 		       __func__);
2239 		goto out;
2240 	}
2241 
2242 	sock_register(&unix_family_ops);
2243 	register_pernet_subsys(&unix_net_ops);
2244 out:
2245 	return rc;
2246 }
2247 
2248 static void __exit af_unix_exit(void)
2249 {
2250 	sock_unregister(PF_UNIX);
2251 	proto_unregister(&unix_proto);
2252 	unregister_pernet_subsys(&unix_net_ops);
2253 }
2254 
2255 /* Earlier than device_initcall() so that other drivers invoking
2256    request_module() don't end up in a loop when modprobe tries
2257    to use a UNIX socket. But later than subsys_initcall() because
2258    we depend on stuff initialised there */
2259 fs_initcall(af_unix_init);
2260 module_exit(af_unix_exit);
2261 
2262 MODULE_LICENSE("GPL");
2263 MODULE_ALIAS_NETPROTO(PF_UNIX);
2264