xref: /linux/net/unix/af_unix.c (revision 89fe5117928b2c1272c9376362131ded561c91ad)
1 /*
2  * NET4:	Implementation of BSD Unix domain sockets.
3  *
4  * Authors:	Alan Cox, <alan.cox@linux.org>
5  *
6  *		This program is free software; you can redistribute it and/or
7  *		modify it under the terms of the GNU General Public License
8  *		as published by the Free Software Foundation; either version
9  *		2 of the License, or (at your option) any later version.
10  *
11  * Version:	$Id: af_unix.c,v 1.133 2002/02/08 03:57:19 davem Exp $
12  *
13  * Fixes:
14  *		Linus Torvalds	:	Assorted bug cures.
15  *		Niibe Yutaka	:	async I/O support.
16  *		Carsten Paeth	:	PF_UNIX check, address fixes.
17  *		Alan Cox	:	Limit size of allocated blocks.
18  *		Alan Cox	:	Fixed the stupid socketpair bug.
19  *		Alan Cox	:	BSD compatibility fine tuning.
20  *		Alan Cox	:	Fixed a bug in connect when interrupted.
21  *		Alan Cox	:	Sorted out a proper draft version of
22  *					file descriptor passing hacked up from
23  *					Mike Shaver's work.
24  *		Marty Leisner	:	Fixes to fd passing
25  *		Nick Nevin	:	recvmsg bugfix.
26  *		Alan Cox	:	Started proper garbage collector
27  *		Heiko EiBfeldt	:	Missing verify_area check
28  *		Alan Cox	:	Started POSIXisms
29  *		Andreas Schwab	:	Replace inode by dentry for proper
30  *					reference counting
31  *		Kirk Petersen	:	Made this a module
32  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
33  *					Lots of bug fixes.
34  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
35  *					by above two patches.
36  *	     Andrea Arcangeli	:	If possible we block in connect(2)
37  *					if the max backlog of the listen socket
38  *					is been reached. This won't break
39  *					old apps and it will avoid huge amount
40  *					of socks hashed (this for unix_gc()
41  *					performances reasons).
42  *					Security fix that limits the max
43  *					number of socks to 2*max_files and
44  *					the number of skb queueable in the
45  *					dgram receiver.
46  *		Artur Skawina   :	Hash function optimizations
47  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
48  *	      Malcolm Beattie   :	Set peercred for socketpair
49  *	     Michal Ostrowski   :       Module initialization cleanup.
50  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
51  *	     				the core infrastructure is doing that
52  *	     				for all net proto families now (2.5.69+)
53  *
54  *
55  * Known differences from reference BSD that was tested:
56  *
57  *	[TO FIX]
58  *	ECONNREFUSED is not returned from one end of a connected() socket to the
59  *		other the moment one end closes.
60  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
61  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
62  *	[NOT TO FIX]
63  *	accept() returns a path name even if the connecting socket has closed
64  *		in the meantime (BSD loses the path and gives up).
65  *	accept() returns 0 length path for an unbound connector. BSD returns 16
66  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
67  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
68  *	BSD af_unix apparently has connect forgetting to block properly.
69  *		(need to check this with the POSIX spec in detail)
70  *
71  * Differences from 2.0.0-11-... (ANK)
72  *	Bug fixes and improvements.
73  *		- client shutdown killed server socket.
74  *		- removed all useless cli/sti pairs.
75  *
76  *	Semantic changes/extensions.
77  *		- generic control message passing.
78  *		- SCM_CREDENTIALS control message.
79  *		- "Abstract" (not FS based) socket bindings.
80  *		  Abstract names are sequences of bytes (not zero terminated)
81  *		  started by 0, so that this name space does not intersect
82  *		  with BSD names.
83  */
84 
85 #include <linux/module.h>
86 #include <linux/kernel.h>
87 #include <linux/signal.h>
88 #include <linux/sched.h>
89 #include <linux/errno.h>
90 #include <linux/string.h>
91 #include <linux/stat.h>
92 #include <linux/dcache.h>
93 #include <linux/namei.h>
94 #include <linux/socket.h>
95 #include <linux/un.h>
96 #include <linux/fcntl.h>
97 #include <linux/termios.h>
98 #include <linux/sockios.h>
99 #include <linux/net.h>
100 #include <linux/in.h>
101 #include <linux/fs.h>
102 #include <linux/slab.h>
103 #include <asm/uaccess.h>
104 #include <linux/skbuff.h>
105 #include <linux/netdevice.h>
106 #include <net/net_namespace.h>
107 #include <net/sock.h>
108 #include <net/tcp_states.h>
109 #include <net/af_unix.h>
110 #include <linux/proc_fs.h>
111 #include <linux/seq_file.h>
112 #include <net/scm.h>
113 #include <linux/init.h>
114 #include <linux/poll.h>
115 #include <linux/rtnetlink.h>
116 #include <linux/mount.h>
117 #include <net/checksum.h>
118 #include <linux/security.h>
119 
120 static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
121 static DEFINE_SPINLOCK(unix_table_lock);
122 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
123 
124 #define unix_sockets_unbound	(&unix_socket_table[UNIX_HASH_SIZE])
125 
126 #define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
127 
128 #ifdef CONFIG_SECURITY_NETWORK
129 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
130 {
131 	memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
132 }
133 
134 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
135 {
136 	scm->secid = *UNIXSID(skb);
137 }
138 #else
139 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
140 { }
141 
142 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
143 { }
144 #endif /* CONFIG_SECURITY_NETWORK */
145 
146 /*
147  *  SMP locking strategy:
148  *    hash table is protected with spinlock unix_table_lock
149  *    each socket state is protected by separate rwlock.
150  */
151 
152 static inline unsigned unix_hash_fold(__wsum n)
153 {
154 	unsigned hash = (__force unsigned)n;
155 	hash ^= hash>>16;
156 	hash ^= hash>>8;
157 	return hash&(UNIX_HASH_SIZE-1);
158 }
159 
160 #define unix_peer(sk) (unix_sk(sk)->peer)
161 
162 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
163 {
164 	return unix_peer(osk) == sk;
165 }
166 
167 static inline int unix_may_send(struct sock *sk, struct sock *osk)
168 {
169 	return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
170 }
171 
172 static inline int unix_recvq_full(struct sock const *sk)
173 {
174 	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
175 }
176 
177 static struct sock *unix_peer_get(struct sock *s)
178 {
179 	struct sock *peer;
180 
181 	unix_state_lock(s);
182 	peer = unix_peer(s);
183 	if (peer)
184 		sock_hold(peer);
185 	unix_state_unlock(s);
186 	return peer;
187 }
188 
189 static inline void unix_release_addr(struct unix_address *addr)
190 {
191 	if (atomic_dec_and_test(&addr->refcnt))
192 		kfree(addr);
193 }
194 
195 /*
196  *	Check unix socket name:
197  *		- should be not zero length.
198  *	        - if started by not zero, should be NULL terminated (FS object)
199  *		- if started by zero, it is abstract name.
200  */
201 
202 static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
203 {
204 	if (len <= sizeof(short) || len > sizeof(*sunaddr))
205 		return -EINVAL;
206 	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
207 		return -EINVAL;
208 	if (sunaddr->sun_path[0]) {
209 		/*
210 		 * This may look like an off by one error but it is a bit more
211 		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
212 		 * sun_path[108] doesnt as such exist.  However in kernel space
213 		 * we are guaranteed that it is a valid memory location in our
214 		 * kernel address buffer.
215 		 */
216 		((char *)sunaddr)[len]=0;
217 		len = strlen(sunaddr->sun_path)+1+sizeof(short);
218 		return len;
219 	}
220 
221 	*hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0));
222 	return len;
223 }
224 
225 static void __unix_remove_socket(struct sock *sk)
226 {
227 	sk_del_node_init(sk);
228 }
229 
230 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
231 {
232 	BUG_TRAP(sk_unhashed(sk));
233 	sk_add_node(sk, list);
234 }
235 
236 static inline void unix_remove_socket(struct sock *sk)
237 {
238 	spin_lock(&unix_table_lock);
239 	__unix_remove_socket(sk);
240 	spin_unlock(&unix_table_lock);
241 }
242 
243 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
244 {
245 	spin_lock(&unix_table_lock);
246 	__unix_insert_socket(list, sk);
247 	spin_unlock(&unix_table_lock);
248 }
249 
250 static struct sock *__unix_find_socket_byname(struct net *net,
251 					      struct sockaddr_un *sunname,
252 					      int len, int type, unsigned hash)
253 {
254 	struct sock *s;
255 	struct hlist_node *node;
256 
257 	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
258 		struct unix_sock *u = unix_sk(s);
259 
260 		if (!net_eq(sock_net(s), net))
261 			continue;
262 
263 		if (u->addr->len == len &&
264 		    !memcmp(u->addr->name, sunname, len))
265 			goto found;
266 	}
267 	s = NULL;
268 found:
269 	return s;
270 }
271 
272 static inline struct sock *unix_find_socket_byname(struct net *net,
273 						   struct sockaddr_un *sunname,
274 						   int len, int type,
275 						   unsigned hash)
276 {
277 	struct sock *s;
278 
279 	spin_lock(&unix_table_lock);
280 	s = __unix_find_socket_byname(net, sunname, len, type, hash);
281 	if (s)
282 		sock_hold(s);
283 	spin_unlock(&unix_table_lock);
284 	return s;
285 }
286 
287 static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
288 {
289 	struct sock *s;
290 	struct hlist_node *node;
291 
292 	spin_lock(&unix_table_lock);
293 	sk_for_each(s, node,
294 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
295 		struct dentry *dentry = unix_sk(s)->dentry;
296 
297 		if (!net_eq(sock_net(s), net))
298 			continue;
299 
300 		if(dentry && dentry->d_inode == i)
301 		{
302 			sock_hold(s);
303 			goto found;
304 		}
305 	}
306 	s = NULL;
307 found:
308 	spin_unlock(&unix_table_lock);
309 	return s;
310 }
311 
312 static inline int unix_writable(struct sock *sk)
313 {
314 	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
315 }
316 
317 static void unix_write_space(struct sock *sk)
318 {
319 	read_lock(&sk->sk_callback_lock);
320 	if (unix_writable(sk)) {
321 		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
322 			wake_up_interruptible_sync(sk->sk_sleep);
323 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
324 	}
325 	read_unlock(&sk->sk_callback_lock);
326 }
327 
328 /* When dgram socket disconnects (or changes its peer), we clear its receive
329  * queue of packets arrived from previous peer. First, it allows to do
330  * flow control based only on wmem_alloc; second, sk connected to peer
331  * may receive messages only from that peer. */
332 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
333 {
334 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
335 		skb_queue_purge(&sk->sk_receive_queue);
336 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
337 
338 		/* If one link of bidirectional dgram pipe is disconnected,
339 		 * we signal error. Messages are lost. Do not make this,
340 		 * when peer was not connected to us.
341 		 */
342 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
343 			other->sk_err = ECONNRESET;
344 			other->sk_error_report(other);
345 		}
346 	}
347 }
348 
349 static void unix_sock_destructor(struct sock *sk)
350 {
351 	struct unix_sock *u = unix_sk(sk);
352 
353 	skb_queue_purge(&sk->sk_receive_queue);
354 
355 	BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
356 	BUG_TRAP(sk_unhashed(sk));
357 	BUG_TRAP(!sk->sk_socket);
358 	if (!sock_flag(sk, SOCK_DEAD)) {
359 		printk("Attempt to release alive unix socket: %p\n", sk);
360 		return;
361 	}
362 
363 	if (u->addr)
364 		unix_release_addr(u->addr);
365 
366 	atomic_dec(&unix_nr_socks);
367 #ifdef UNIX_REFCNT_DEBUG
368 	printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));
369 #endif
370 }
371 
372 static int unix_release_sock (struct sock *sk, int embrion)
373 {
374 	struct unix_sock *u = unix_sk(sk);
375 	struct dentry *dentry;
376 	struct vfsmount *mnt;
377 	struct sock *skpair;
378 	struct sk_buff *skb;
379 	int state;
380 
381 	unix_remove_socket(sk);
382 
383 	/* Clear state */
384 	unix_state_lock(sk);
385 	sock_orphan(sk);
386 	sk->sk_shutdown = SHUTDOWN_MASK;
387 	dentry	     = u->dentry;
388 	u->dentry    = NULL;
389 	mnt	     = u->mnt;
390 	u->mnt	     = NULL;
391 	state = sk->sk_state;
392 	sk->sk_state = TCP_CLOSE;
393 	unix_state_unlock(sk);
394 
395 	wake_up_interruptible_all(&u->peer_wait);
396 
397 	skpair=unix_peer(sk);
398 
399 	if (skpair!=NULL) {
400 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
401 			unix_state_lock(skpair);
402 			/* No more writes */
403 			skpair->sk_shutdown = SHUTDOWN_MASK;
404 			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
405 				skpair->sk_err = ECONNRESET;
406 			unix_state_unlock(skpair);
407 			skpair->sk_state_change(skpair);
408 			read_lock(&skpair->sk_callback_lock);
409 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
410 			read_unlock(&skpair->sk_callback_lock);
411 		}
412 		sock_put(skpair); /* It may now die */
413 		unix_peer(sk) = NULL;
414 	}
415 
416 	/* Try to flush out this socket. Throw out buffers at least */
417 
418 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
419 		if (state==TCP_LISTEN)
420 			unix_release_sock(skb->sk, 1);
421 		/* passed fds are erased in the kfree_skb hook	      */
422 		kfree_skb(skb);
423 	}
424 
425 	if (dentry) {
426 		dput(dentry);
427 		mntput(mnt);
428 	}
429 
430 	sock_put(sk);
431 
432 	/* ---- Socket is dead now and most probably destroyed ---- */
433 
434 	/*
435 	 * Fixme: BSD difference: In BSD all sockets connected to use get
436 	 *	  ECONNRESET and we die on the spot. In Linux we behave
437 	 *	  like files and pipes do and wait for the last
438 	 *	  dereference.
439 	 *
440 	 * Can't we simply set sock->err?
441 	 *
442 	 *	  What the above comment does talk about? --ANK(980817)
443 	 */
444 
445 	if (unix_tot_inflight)
446 		unix_gc();		/* Garbage collect fds */
447 
448 	return 0;
449 }
450 
451 static int unix_listen(struct socket *sock, int backlog)
452 {
453 	int err;
454 	struct sock *sk = sock->sk;
455 	struct unix_sock *u = unix_sk(sk);
456 
457 	err = -EOPNOTSUPP;
458 	if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
459 		goto out;			/* Only stream/seqpacket sockets accept */
460 	err = -EINVAL;
461 	if (!u->addr)
462 		goto out;			/* No listens on an unbound socket */
463 	unix_state_lock(sk);
464 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
465 		goto out_unlock;
466 	if (backlog > sk->sk_max_ack_backlog)
467 		wake_up_interruptible_all(&u->peer_wait);
468 	sk->sk_max_ack_backlog	= backlog;
469 	sk->sk_state		= TCP_LISTEN;
470 	/* set credentials so connect can copy them */
471 	sk->sk_peercred.pid	= task_tgid_vnr(current);
472 	sk->sk_peercred.uid	= current->euid;
473 	sk->sk_peercred.gid	= current->egid;
474 	err = 0;
475 
476 out_unlock:
477 	unix_state_unlock(sk);
478 out:
479 	return err;
480 }
481 
482 static int unix_release(struct socket *);
483 static int unix_bind(struct socket *, struct sockaddr *, int);
484 static int unix_stream_connect(struct socket *, struct sockaddr *,
485 			       int addr_len, int flags);
486 static int unix_socketpair(struct socket *, struct socket *);
487 static int unix_accept(struct socket *, struct socket *, int);
488 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
489 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
490 static unsigned int unix_dgram_poll(struct file *, struct socket *,
491 				    poll_table *);
492 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
493 static int unix_shutdown(struct socket *, int);
494 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
495 			       struct msghdr *, size_t);
496 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
497 			       struct msghdr *, size_t, int);
498 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
499 			      struct msghdr *, size_t);
500 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
501 			      struct msghdr *, size_t, int);
502 static int unix_dgram_connect(struct socket *, struct sockaddr *,
503 			      int, int);
504 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
505 				  struct msghdr *, size_t);
506 
507 static const struct proto_ops unix_stream_ops = {
508 	.family =	PF_UNIX,
509 	.owner =	THIS_MODULE,
510 	.release =	unix_release,
511 	.bind =		unix_bind,
512 	.connect =	unix_stream_connect,
513 	.socketpair =	unix_socketpair,
514 	.accept =	unix_accept,
515 	.getname =	unix_getname,
516 	.poll =		unix_poll,
517 	.ioctl =	unix_ioctl,
518 	.listen =	unix_listen,
519 	.shutdown =	unix_shutdown,
520 	.setsockopt =	sock_no_setsockopt,
521 	.getsockopt =	sock_no_getsockopt,
522 	.sendmsg =	unix_stream_sendmsg,
523 	.recvmsg =	unix_stream_recvmsg,
524 	.mmap =		sock_no_mmap,
525 	.sendpage =	sock_no_sendpage,
526 };
527 
528 static const struct proto_ops unix_dgram_ops = {
529 	.family =	PF_UNIX,
530 	.owner =	THIS_MODULE,
531 	.release =	unix_release,
532 	.bind =		unix_bind,
533 	.connect =	unix_dgram_connect,
534 	.socketpair =	unix_socketpair,
535 	.accept =	sock_no_accept,
536 	.getname =	unix_getname,
537 	.poll =		unix_dgram_poll,
538 	.ioctl =	unix_ioctl,
539 	.listen =	sock_no_listen,
540 	.shutdown =	unix_shutdown,
541 	.setsockopt =	sock_no_setsockopt,
542 	.getsockopt =	sock_no_getsockopt,
543 	.sendmsg =	unix_dgram_sendmsg,
544 	.recvmsg =	unix_dgram_recvmsg,
545 	.mmap =		sock_no_mmap,
546 	.sendpage =	sock_no_sendpage,
547 };
548 
549 static const struct proto_ops unix_seqpacket_ops = {
550 	.family =	PF_UNIX,
551 	.owner =	THIS_MODULE,
552 	.release =	unix_release,
553 	.bind =		unix_bind,
554 	.connect =	unix_stream_connect,
555 	.socketpair =	unix_socketpair,
556 	.accept =	unix_accept,
557 	.getname =	unix_getname,
558 	.poll =		unix_dgram_poll,
559 	.ioctl =	unix_ioctl,
560 	.listen =	unix_listen,
561 	.shutdown =	unix_shutdown,
562 	.setsockopt =	sock_no_setsockopt,
563 	.getsockopt =	sock_no_getsockopt,
564 	.sendmsg =	unix_seqpacket_sendmsg,
565 	.recvmsg =	unix_dgram_recvmsg,
566 	.mmap =		sock_no_mmap,
567 	.sendpage =	sock_no_sendpage,
568 };
569 
570 static struct proto unix_proto = {
571 	.name	  = "UNIX",
572 	.owner	  = THIS_MODULE,
573 	.obj_size = sizeof(struct unix_sock),
574 };
575 
576 /*
577  * AF_UNIX sockets do not interact with hardware, hence they
578  * dont trigger interrupts - so it's safe for them to have
579  * bh-unsafe locking for their sk_receive_queue.lock. Split off
580  * this special lock-class by reinitializing the spinlock key:
581  */
582 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
583 
584 static struct sock * unix_create1(struct net *net, struct socket *sock)
585 {
586 	struct sock *sk = NULL;
587 	struct unix_sock *u;
588 
589 	atomic_inc(&unix_nr_socks);
590 	if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
591 		goto out;
592 
593 	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
594 	if (!sk)
595 		goto out;
596 
597 	sock_init_data(sock,sk);
598 	lockdep_set_class(&sk->sk_receive_queue.lock,
599 				&af_unix_sk_receive_queue_lock_key);
600 
601 	sk->sk_write_space	= unix_write_space;
602 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
603 	sk->sk_destruct		= unix_sock_destructor;
604 	u	  = unix_sk(sk);
605 	u->dentry = NULL;
606 	u->mnt	  = NULL;
607 	spin_lock_init(&u->lock);
608 	atomic_set(&u->inflight, 0);
609 	INIT_LIST_HEAD(&u->link);
610 	mutex_init(&u->readlock); /* single task reading lock */
611 	init_waitqueue_head(&u->peer_wait);
612 	unix_insert_socket(unix_sockets_unbound, sk);
613 out:
614 	if (sk == NULL)
615 		atomic_dec(&unix_nr_socks);
616 	return sk;
617 }
618 
619 static int unix_create(struct net *net, struct socket *sock, int protocol)
620 {
621 	if (protocol && protocol != PF_UNIX)
622 		return -EPROTONOSUPPORT;
623 
624 	sock->state = SS_UNCONNECTED;
625 
626 	switch (sock->type) {
627 	case SOCK_STREAM:
628 		sock->ops = &unix_stream_ops;
629 		break;
630 		/*
631 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
632 		 *	nothing uses it.
633 		 */
634 	case SOCK_RAW:
635 		sock->type=SOCK_DGRAM;
636 	case SOCK_DGRAM:
637 		sock->ops = &unix_dgram_ops;
638 		break;
639 	case SOCK_SEQPACKET:
640 		sock->ops = &unix_seqpacket_ops;
641 		break;
642 	default:
643 		return -ESOCKTNOSUPPORT;
644 	}
645 
646 	return unix_create1(net, sock) ? 0 : -ENOMEM;
647 }
648 
649 static int unix_release(struct socket *sock)
650 {
651 	struct sock *sk = sock->sk;
652 
653 	if (!sk)
654 		return 0;
655 
656 	sock->sk = NULL;
657 
658 	return unix_release_sock (sk, 0);
659 }
660 
661 static int unix_autobind(struct socket *sock)
662 {
663 	struct sock *sk = sock->sk;
664 	struct net *net = sock_net(sk);
665 	struct unix_sock *u = unix_sk(sk);
666 	static u32 ordernum = 1;
667 	struct unix_address * addr;
668 	int err;
669 
670 	mutex_lock(&u->readlock);
671 
672 	err = 0;
673 	if (u->addr)
674 		goto out;
675 
676 	err = -ENOMEM;
677 	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
678 	if (!addr)
679 		goto out;
680 
681 	addr->name->sun_family = AF_UNIX;
682 	atomic_set(&addr->refcnt, 1);
683 
684 retry:
685 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
686 	addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
687 
688 	spin_lock(&unix_table_lock);
689 	ordernum = (ordernum+1)&0xFFFFF;
690 
691 	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
692 				      addr->hash)) {
693 		spin_unlock(&unix_table_lock);
694 		/* Sanity yield. It is unusual case, but yet... */
695 		if (!(ordernum&0xFF))
696 			yield();
697 		goto retry;
698 	}
699 	addr->hash ^= sk->sk_type;
700 
701 	__unix_remove_socket(sk);
702 	u->addr = addr;
703 	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
704 	spin_unlock(&unix_table_lock);
705 	err = 0;
706 
707 out:	mutex_unlock(&u->readlock);
708 	return err;
709 }
710 
711 static struct sock *unix_find_other(struct net *net,
712 				    struct sockaddr_un *sunname, int len,
713 				    int type, unsigned hash, int *error)
714 {
715 	struct sock *u;
716 	struct nameidata nd;
717 	int err = 0;
718 
719 	if (sunname->sun_path[0]) {
720 		err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
721 		if (err)
722 			goto fail;
723 		err = vfs_permission(&nd, MAY_WRITE);
724 		if (err)
725 			goto put_fail;
726 
727 		err = -ECONNREFUSED;
728 		if (!S_ISSOCK(nd.path.dentry->d_inode->i_mode))
729 			goto put_fail;
730 		u = unix_find_socket_byinode(net, nd.path.dentry->d_inode);
731 		if (!u)
732 			goto put_fail;
733 
734 		if (u->sk_type == type)
735 			touch_atime(nd.path.mnt, nd.path.dentry);
736 
737 		path_put(&nd.path);
738 
739 		err=-EPROTOTYPE;
740 		if (u->sk_type != type) {
741 			sock_put(u);
742 			goto fail;
743 		}
744 	} else {
745 		err = -ECONNREFUSED;
746 		u=unix_find_socket_byname(net, sunname, len, type, hash);
747 		if (u) {
748 			struct dentry *dentry;
749 			dentry = unix_sk(u)->dentry;
750 			if (dentry)
751 				touch_atime(unix_sk(u)->mnt, dentry);
752 		} else
753 			goto fail;
754 	}
755 	return u;
756 
757 put_fail:
758 	path_put(&nd.path);
759 fail:
760 	*error=err;
761 	return NULL;
762 }
763 
764 
765 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
766 {
767 	struct sock *sk = sock->sk;
768 	struct net *net = sock_net(sk);
769 	struct unix_sock *u = unix_sk(sk);
770 	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
771 	struct dentry * dentry = NULL;
772 	struct nameidata nd;
773 	int err;
774 	unsigned hash;
775 	struct unix_address *addr;
776 	struct hlist_head *list;
777 
778 	err = -EINVAL;
779 	if (sunaddr->sun_family != AF_UNIX)
780 		goto out;
781 
782 	if (addr_len==sizeof(short)) {
783 		err = unix_autobind(sock);
784 		goto out;
785 	}
786 
787 	err = unix_mkname(sunaddr, addr_len, &hash);
788 	if (err < 0)
789 		goto out;
790 	addr_len = err;
791 
792 	mutex_lock(&u->readlock);
793 
794 	err = -EINVAL;
795 	if (u->addr)
796 		goto out_up;
797 
798 	err = -ENOMEM;
799 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
800 	if (!addr)
801 		goto out_up;
802 
803 	memcpy(addr->name, sunaddr, addr_len);
804 	addr->len = addr_len;
805 	addr->hash = hash ^ sk->sk_type;
806 	atomic_set(&addr->refcnt, 1);
807 
808 	if (sunaddr->sun_path[0]) {
809 		unsigned int mode;
810 		err = 0;
811 		/*
812 		 * Get the parent directory, calculate the hash for last
813 		 * component.
814 		 */
815 		err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
816 		if (err)
817 			goto out_mknod_parent;
818 
819 		dentry = lookup_create(&nd, 0);
820 		err = PTR_ERR(dentry);
821 		if (IS_ERR(dentry))
822 			goto out_mknod_unlock;
823 
824 		/*
825 		 * All right, let's create it.
826 		 */
827 		mode = S_IFSOCK |
828 		       (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
829 		err = mnt_want_write(nd.path.mnt);
830 		if (err)
831 			goto out_mknod_dput;
832 		err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
833 		mnt_drop_write(nd.path.mnt);
834 		if (err)
835 			goto out_mknod_dput;
836 		mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
837 		dput(nd.path.dentry);
838 		nd.path.dentry = dentry;
839 
840 		addr->hash = UNIX_HASH_SIZE;
841 	}
842 
843 	spin_lock(&unix_table_lock);
844 
845 	if (!sunaddr->sun_path[0]) {
846 		err = -EADDRINUSE;
847 		if (__unix_find_socket_byname(net, sunaddr, addr_len,
848 					      sk->sk_type, hash)) {
849 			unix_release_addr(addr);
850 			goto out_unlock;
851 		}
852 
853 		list = &unix_socket_table[addr->hash];
854 	} else {
855 		list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
856 		u->dentry = nd.path.dentry;
857 		u->mnt    = nd.path.mnt;
858 	}
859 
860 	err = 0;
861 	__unix_remove_socket(sk);
862 	u->addr = addr;
863 	__unix_insert_socket(list, sk);
864 
865 out_unlock:
866 	spin_unlock(&unix_table_lock);
867 out_up:
868 	mutex_unlock(&u->readlock);
869 out:
870 	return err;
871 
872 out_mknod_dput:
873 	dput(dentry);
874 out_mknod_unlock:
875 	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
876 	path_put(&nd.path);
877 out_mknod_parent:
878 	if (err==-EEXIST)
879 		err=-EADDRINUSE;
880 	unix_release_addr(addr);
881 	goto out_up;
882 }
883 
884 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
885 {
886 	if (unlikely(sk1 == sk2) || !sk2) {
887 		unix_state_lock(sk1);
888 		return;
889 	}
890 	if (sk1 < sk2) {
891 		unix_state_lock(sk1);
892 		unix_state_lock_nested(sk2);
893 	} else {
894 		unix_state_lock(sk2);
895 		unix_state_lock_nested(sk1);
896 	}
897 }
898 
899 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
900 {
901 	if (unlikely(sk1 == sk2) || !sk2) {
902 		unix_state_unlock(sk1);
903 		return;
904 	}
905 	unix_state_unlock(sk1);
906 	unix_state_unlock(sk2);
907 }
908 
909 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
910 			      int alen, int flags)
911 {
912 	struct sock *sk = sock->sk;
913 	struct net *net = sock_net(sk);
914 	struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
915 	struct sock *other;
916 	unsigned hash;
917 	int err;
918 
919 	if (addr->sa_family != AF_UNSPEC) {
920 		err = unix_mkname(sunaddr, alen, &hash);
921 		if (err < 0)
922 			goto out;
923 		alen = err;
924 
925 		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
926 		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
927 			goto out;
928 
929 restart:
930 		other=unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
931 		if (!other)
932 			goto out;
933 
934 		unix_state_double_lock(sk, other);
935 
936 		/* Apparently VFS overslept socket death. Retry. */
937 		if (sock_flag(other, SOCK_DEAD)) {
938 			unix_state_double_unlock(sk, other);
939 			sock_put(other);
940 			goto restart;
941 		}
942 
943 		err = -EPERM;
944 		if (!unix_may_send(sk, other))
945 			goto out_unlock;
946 
947 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
948 		if (err)
949 			goto out_unlock;
950 
951 	} else {
952 		/*
953 		 *	1003.1g breaking connected state with AF_UNSPEC
954 		 */
955 		other = NULL;
956 		unix_state_double_lock(sk, other);
957 	}
958 
959 	/*
960 	 * If it was connected, reconnect.
961 	 */
962 	if (unix_peer(sk)) {
963 		struct sock *old_peer = unix_peer(sk);
964 		unix_peer(sk)=other;
965 		unix_state_double_unlock(sk, other);
966 
967 		if (other != old_peer)
968 			unix_dgram_disconnected(sk, old_peer);
969 		sock_put(old_peer);
970 	} else {
971 		unix_peer(sk)=other;
972 		unix_state_double_unlock(sk, other);
973 	}
974 	return 0;
975 
976 out_unlock:
977 	unix_state_double_unlock(sk, other);
978 	sock_put(other);
979 out:
980 	return err;
981 }
982 
983 static long unix_wait_for_peer(struct sock *other, long timeo)
984 {
985 	struct unix_sock *u = unix_sk(other);
986 	int sched;
987 	DEFINE_WAIT(wait);
988 
989 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
990 
991 	sched = !sock_flag(other, SOCK_DEAD) &&
992 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
993 		unix_recvq_full(other);
994 
995 	unix_state_unlock(other);
996 
997 	if (sched)
998 		timeo = schedule_timeout(timeo);
999 
1000 	finish_wait(&u->peer_wait, &wait);
1001 	return timeo;
1002 }
1003 
1004 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1005 			       int addr_len, int flags)
1006 {
1007 	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1008 	struct sock *sk = sock->sk;
1009 	struct net *net = sock_net(sk);
1010 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1011 	struct sock *newsk = NULL;
1012 	struct sock *other = NULL;
1013 	struct sk_buff *skb = NULL;
1014 	unsigned hash;
1015 	int st;
1016 	int err;
1017 	long timeo;
1018 
1019 	err = unix_mkname(sunaddr, addr_len, &hash);
1020 	if (err < 0)
1021 		goto out;
1022 	addr_len = err;
1023 
1024 	if (test_bit(SOCK_PASSCRED, &sock->flags)
1025 		&& !u->addr && (err = unix_autobind(sock)) != 0)
1026 		goto out;
1027 
1028 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1029 
1030 	/* First of all allocate resources.
1031 	   If we will make it after state is locked,
1032 	   we will have to recheck all again in any case.
1033 	 */
1034 
1035 	err = -ENOMEM;
1036 
1037 	/* create new sock for complete connection */
1038 	newsk = unix_create1(sock_net(sk), NULL);
1039 	if (newsk == NULL)
1040 		goto out;
1041 
1042 	/* Allocate skb for sending to listening sock */
1043 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1044 	if (skb == NULL)
1045 		goto out;
1046 
1047 restart:
1048 	/*  Find listening sock. */
1049 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1050 	if (!other)
1051 		goto out;
1052 
1053 	/* Latch state of peer */
1054 	unix_state_lock(other);
1055 
1056 	/* Apparently VFS overslept socket death. Retry. */
1057 	if (sock_flag(other, SOCK_DEAD)) {
1058 		unix_state_unlock(other);
1059 		sock_put(other);
1060 		goto restart;
1061 	}
1062 
1063 	err = -ECONNREFUSED;
1064 	if (other->sk_state != TCP_LISTEN)
1065 		goto out_unlock;
1066 
1067 	if (unix_recvq_full(other)) {
1068 		err = -EAGAIN;
1069 		if (!timeo)
1070 			goto out_unlock;
1071 
1072 		timeo = unix_wait_for_peer(other, timeo);
1073 
1074 		err = sock_intr_errno(timeo);
1075 		if (signal_pending(current))
1076 			goto out;
1077 		sock_put(other);
1078 		goto restart;
1079 	}
1080 
1081 	/* Latch our state.
1082 
1083 	   It is tricky place. We need to grab write lock and cannot
1084 	   drop lock on peer. It is dangerous because deadlock is
1085 	   possible. Connect to self case and simultaneous
1086 	   attempt to connect are eliminated by checking socket
1087 	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1088 	   check this before attempt to grab lock.
1089 
1090 	   Well, and we have to recheck the state after socket locked.
1091 	 */
1092 	st = sk->sk_state;
1093 
1094 	switch (st) {
1095 	case TCP_CLOSE:
1096 		/* This is ok... continue with connect */
1097 		break;
1098 	case TCP_ESTABLISHED:
1099 		/* Socket is already connected */
1100 		err = -EISCONN;
1101 		goto out_unlock;
1102 	default:
1103 		err = -EINVAL;
1104 		goto out_unlock;
1105 	}
1106 
1107 	unix_state_lock_nested(sk);
1108 
1109 	if (sk->sk_state != st) {
1110 		unix_state_unlock(sk);
1111 		unix_state_unlock(other);
1112 		sock_put(other);
1113 		goto restart;
1114 	}
1115 
1116 	err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1117 	if (err) {
1118 		unix_state_unlock(sk);
1119 		goto out_unlock;
1120 	}
1121 
1122 	/* The way is open! Fastly set all the necessary fields... */
1123 
1124 	sock_hold(sk);
1125 	unix_peer(newsk)	= sk;
1126 	newsk->sk_state		= TCP_ESTABLISHED;
1127 	newsk->sk_type		= sk->sk_type;
1128 	newsk->sk_peercred.pid	= task_tgid_vnr(current);
1129 	newsk->sk_peercred.uid	= current->euid;
1130 	newsk->sk_peercred.gid	= current->egid;
1131 	newu = unix_sk(newsk);
1132 	newsk->sk_sleep		= &newu->peer_wait;
1133 	otheru = unix_sk(other);
1134 
1135 	/* copy address information from listening to new sock*/
1136 	if (otheru->addr) {
1137 		atomic_inc(&otheru->addr->refcnt);
1138 		newu->addr = otheru->addr;
1139 	}
1140 	if (otheru->dentry) {
1141 		newu->dentry	= dget(otheru->dentry);
1142 		newu->mnt	= mntget(otheru->mnt);
1143 	}
1144 
1145 	/* Set credentials */
1146 	sk->sk_peercred = other->sk_peercred;
1147 
1148 	sock->state	= SS_CONNECTED;
1149 	sk->sk_state	= TCP_ESTABLISHED;
1150 	sock_hold(newsk);
1151 
1152 	smp_mb__after_atomic_inc();	/* sock_hold() does an atomic_inc() */
1153 	unix_peer(sk)	= newsk;
1154 
1155 	unix_state_unlock(sk);
1156 
1157 	/* take ten and and send info to listening sock */
1158 	spin_lock(&other->sk_receive_queue.lock);
1159 	__skb_queue_tail(&other->sk_receive_queue, skb);
1160 	spin_unlock(&other->sk_receive_queue.lock);
1161 	unix_state_unlock(other);
1162 	other->sk_data_ready(other, 0);
1163 	sock_put(other);
1164 	return 0;
1165 
1166 out_unlock:
1167 	if (other)
1168 		unix_state_unlock(other);
1169 
1170 out:
1171 	if (skb)
1172 		kfree_skb(skb);
1173 	if (newsk)
1174 		unix_release_sock(newsk, 0);
1175 	if (other)
1176 		sock_put(other);
1177 	return err;
1178 }
1179 
1180 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1181 {
1182 	struct sock *ska=socka->sk, *skb = sockb->sk;
1183 
1184 	/* Join our sockets back to back */
1185 	sock_hold(ska);
1186 	sock_hold(skb);
1187 	unix_peer(ska)=skb;
1188 	unix_peer(skb)=ska;
1189 	ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
1190 	ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
1191 	ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
1192 
1193 	if (ska->sk_type != SOCK_DGRAM) {
1194 		ska->sk_state = TCP_ESTABLISHED;
1195 		skb->sk_state = TCP_ESTABLISHED;
1196 		socka->state  = SS_CONNECTED;
1197 		sockb->state  = SS_CONNECTED;
1198 	}
1199 	return 0;
1200 }
1201 
1202 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1203 {
1204 	struct sock *sk = sock->sk;
1205 	struct sock *tsk;
1206 	struct sk_buff *skb;
1207 	int err;
1208 
1209 	err = -EOPNOTSUPP;
1210 	if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
1211 		goto out;
1212 
1213 	err = -EINVAL;
1214 	if (sk->sk_state != TCP_LISTEN)
1215 		goto out;
1216 
1217 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1218 	 * so that no locks are necessary.
1219 	 */
1220 
1221 	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1222 	if (!skb) {
1223 		/* This means receive shutdown. */
1224 		if (err == 0)
1225 			err = -EINVAL;
1226 		goto out;
1227 	}
1228 
1229 	tsk = skb->sk;
1230 	skb_free_datagram(sk, skb);
1231 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1232 
1233 	/* attach accepted sock to socket */
1234 	unix_state_lock(tsk);
1235 	newsock->state = SS_CONNECTED;
1236 	sock_graft(tsk, newsock);
1237 	unix_state_unlock(tsk);
1238 	return 0;
1239 
1240 out:
1241 	return err;
1242 }
1243 
1244 
1245 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1246 {
1247 	struct sock *sk = sock->sk;
1248 	struct unix_sock *u;
1249 	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1250 	int err = 0;
1251 
1252 	if (peer) {
1253 		sk = unix_peer_get(sk);
1254 
1255 		err = -ENOTCONN;
1256 		if (!sk)
1257 			goto out;
1258 		err = 0;
1259 	} else {
1260 		sock_hold(sk);
1261 	}
1262 
1263 	u = unix_sk(sk);
1264 	unix_state_lock(sk);
1265 	if (!u->addr) {
1266 		sunaddr->sun_family = AF_UNIX;
1267 		sunaddr->sun_path[0] = 0;
1268 		*uaddr_len = sizeof(short);
1269 	} else {
1270 		struct unix_address *addr = u->addr;
1271 
1272 		*uaddr_len = addr->len;
1273 		memcpy(sunaddr, addr->name, *uaddr_len);
1274 	}
1275 	unix_state_unlock(sk);
1276 	sock_put(sk);
1277 out:
1278 	return err;
1279 }
1280 
1281 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1282 {
1283 	int i;
1284 
1285 	scm->fp = UNIXCB(skb).fp;
1286 	skb->destructor = sock_wfree;
1287 	UNIXCB(skb).fp = NULL;
1288 
1289 	for (i=scm->fp->count-1; i>=0; i--)
1290 		unix_notinflight(scm->fp->fp[i]);
1291 }
1292 
1293 static void unix_destruct_fds(struct sk_buff *skb)
1294 {
1295 	struct scm_cookie scm;
1296 	memset(&scm, 0, sizeof(scm));
1297 	unix_detach_fds(&scm, skb);
1298 
1299 	/* Alas, it calls VFS */
1300 	/* So fscking what? fput() had been SMP-safe since the last Summer */
1301 	scm_destroy(&scm);
1302 	sock_wfree(skb);
1303 }
1304 
1305 static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1306 {
1307 	int i;
1308 	for (i=scm->fp->count-1; i>=0; i--)
1309 		unix_inflight(scm->fp->fp[i]);
1310 	UNIXCB(skb).fp = scm->fp;
1311 	skb->destructor = unix_destruct_fds;
1312 	scm->fp = NULL;
1313 }
1314 
1315 /*
1316  *	Send AF_UNIX data.
1317  */
1318 
1319 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1320 			      struct msghdr *msg, size_t len)
1321 {
1322 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1323 	struct sock *sk = sock->sk;
1324 	struct net *net = sock_net(sk);
1325 	struct unix_sock *u = unix_sk(sk);
1326 	struct sockaddr_un *sunaddr=msg->msg_name;
1327 	struct sock *other = NULL;
1328 	int namelen = 0; /* fake GCC */
1329 	int err;
1330 	unsigned hash;
1331 	struct sk_buff *skb;
1332 	long timeo;
1333 	struct scm_cookie tmp_scm;
1334 
1335 	if (NULL == siocb->scm)
1336 		siocb->scm = &tmp_scm;
1337 	err = scm_send(sock, msg, siocb->scm);
1338 	if (err < 0)
1339 		return err;
1340 
1341 	err = -EOPNOTSUPP;
1342 	if (msg->msg_flags&MSG_OOB)
1343 		goto out;
1344 
1345 	if (msg->msg_namelen) {
1346 		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1347 		if (err < 0)
1348 			goto out;
1349 		namelen = err;
1350 	} else {
1351 		sunaddr = NULL;
1352 		err = -ENOTCONN;
1353 		other = unix_peer_get(sk);
1354 		if (!other)
1355 			goto out;
1356 	}
1357 
1358 	if (test_bit(SOCK_PASSCRED, &sock->flags)
1359 		&& !u->addr && (err = unix_autobind(sock)) != 0)
1360 		goto out;
1361 
1362 	err = -EMSGSIZE;
1363 	if (len > sk->sk_sndbuf - 32)
1364 		goto out;
1365 
1366 	skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1367 	if (skb==NULL)
1368 		goto out;
1369 
1370 	memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1371 	if (siocb->scm->fp)
1372 		unix_attach_fds(siocb->scm, skb);
1373 	unix_get_secdata(siocb->scm, skb);
1374 
1375 	skb_reset_transport_header(skb);
1376 	err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
1377 	if (err)
1378 		goto out_free;
1379 
1380 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1381 
1382 restart:
1383 	if (!other) {
1384 		err = -ECONNRESET;
1385 		if (sunaddr == NULL)
1386 			goto out_free;
1387 
1388 		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1389 					hash, &err);
1390 		if (other==NULL)
1391 			goto out_free;
1392 	}
1393 
1394 	unix_state_lock(other);
1395 	err = -EPERM;
1396 	if (!unix_may_send(sk, other))
1397 		goto out_unlock;
1398 
1399 	if (sock_flag(other, SOCK_DEAD)) {
1400 		/*
1401 		 *	Check with 1003.1g - what should
1402 		 *	datagram error
1403 		 */
1404 		unix_state_unlock(other);
1405 		sock_put(other);
1406 
1407 		err = 0;
1408 		unix_state_lock(sk);
1409 		if (unix_peer(sk) == other) {
1410 			unix_peer(sk)=NULL;
1411 			unix_state_unlock(sk);
1412 
1413 			unix_dgram_disconnected(sk, other);
1414 			sock_put(other);
1415 			err = -ECONNREFUSED;
1416 		} else {
1417 			unix_state_unlock(sk);
1418 		}
1419 
1420 		other = NULL;
1421 		if (err)
1422 			goto out_free;
1423 		goto restart;
1424 	}
1425 
1426 	err = -EPIPE;
1427 	if (other->sk_shutdown & RCV_SHUTDOWN)
1428 		goto out_unlock;
1429 
1430 	if (sk->sk_type != SOCK_SEQPACKET) {
1431 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1432 		if (err)
1433 			goto out_unlock;
1434 	}
1435 
1436 	if (unix_peer(other) != sk && unix_recvq_full(other)) {
1437 		if (!timeo) {
1438 			err = -EAGAIN;
1439 			goto out_unlock;
1440 		}
1441 
1442 		timeo = unix_wait_for_peer(other, timeo);
1443 
1444 		err = sock_intr_errno(timeo);
1445 		if (signal_pending(current))
1446 			goto out_free;
1447 
1448 		goto restart;
1449 	}
1450 
1451 	skb_queue_tail(&other->sk_receive_queue, skb);
1452 	unix_state_unlock(other);
1453 	other->sk_data_ready(other, len);
1454 	sock_put(other);
1455 	scm_destroy(siocb->scm);
1456 	return len;
1457 
1458 out_unlock:
1459 	unix_state_unlock(other);
1460 out_free:
1461 	kfree_skb(skb);
1462 out:
1463 	if (other)
1464 		sock_put(other);
1465 	scm_destroy(siocb->scm);
1466 	return err;
1467 }
1468 
1469 
1470 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1471 			       struct msghdr *msg, size_t len)
1472 {
1473 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1474 	struct sock *sk = sock->sk;
1475 	struct sock *other = NULL;
1476 	struct sockaddr_un *sunaddr=msg->msg_name;
1477 	int err,size;
1478 	struct sk_buff *skb;
1479 	int sent=0;
1480 	struct scm_cookie tmp_scm;
1481 
1482 	if (NULL == siocb->scm)
1483 		siocb->scm = &tmp_scm;
1484 	err = scm_send(sock, msg, siocb->scm);
1485 	if (err < 0)
1486 		return err;
1487 
1488 	err = -EOPNOTSUPP;
1489 	if (msg->msg_flags&MSG_OOB)
1490 		goto out_err;
1491 
1492 	if (msg->msg_namelen) {
1493 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1494 		goto out_err;
1495 	} else {
1496 		sunaddr = NULL;
1497 		err = -ENOTCONN;
1498 		other = unix_peer(sk);
1499 		if (!other)
1500 			goto out_err;
1501 	}
1502 
1503 	if (sk->sk_shutdown & SEND_SHUTDOWN)
1504 		goto pipe_err;
1505 
1506 	while(sent < len)
1507 	{
1508 		/*
1509 		 *	Optimisation for the fact that under 0.01% of X
1510 		 *	messages typically need breaking up.
1511 		 */
1512 
1513 		size = len-sent;
1514 
1515 		/* Keep two messages in the pipe so it schedules better */
1516 		if (size > ((sk->sk_sndbuf >> 1) - 64))
1517 			size = (sk->sk_sndbuf >> 1) - 64;
1518 
1519 		if (size > SKB_MAX_ALLOC)
1520 			size = SKB_MAX_ALLOC;
1521 
1522 		/*
1523 		 *	Grab a buffer
1524 		 */
1525 
1526 		skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
1527 
1528 		if (skb==NULL)
1529 			goto out_err;
1530 
1531 		/*
1532 		 *	If you pass two values to the sock_alloc_send_skb
1533 		 *	it tries to grab the large buffer with GFP_NOFS
1534 		 *	(which can fail easily), and if it fails grab the
1535 		 *	fallback size buffer which is under a page and will
1536 		 *	succeed. [Alan]
1537 		 */
1538 		size = min_t(int, size, skb_tailroom(skb));
1539 
1540 		memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1541 		if (siocb->scm->fp)
1542 			unix_attach_fds(siocb->scm, skb);
1543 
1544 		if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
1545 			kfree_skb(skb);
1546 			goto out_err;
1547 		}
1548 
1549 		unix_state_lock(other);
1550 
1551 		if (sock_flag(other, SOCK_DEAD) ||
1552 		    (other->sk_shutdown & RCV_SHUTDOWN))
1553 			goto pipe_err_free;
1554 
1555 		skb_queue_tail(&other->sk_receive_queue, skb);
1556 		unix_state_unlock(other);
1557 		other->sk_data_ready(other, size);
1558 		sent+=size;
1559 	}
1560 
1561 	scm_destroy(siocb->scm);
1562 	siocb->scm = NULL;
1563 
1564 	return sent;
1565 
1566 pipe_err_free:
1567 	unix_state_unlock(other);
1568 	kfree_skb(skb);
1569 pipe_err:
1570 	if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
1571 		send_sig(SIGPIPE,current,0);
1572 	err = -EPIPE;
1573 out_err:
1574 	scm_destroy(siocb->scm);
1575 	siocb->scm = NULL;
1576 	return sent ? : err;
1577 }
1578 
1579 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1580 				  struct msghdr *msg, size_t len)
1581 {
1582 	int err;
1583 	struct sock *sk = sock->sk;
1584 
1585 	err = sock_error(sk);
1586 	if (err)
1587 		return err;
1588 
1589 	if (sk->sk_state != TCP_ESTABLISHED)
1590 		return -ENOTCONN;
1591 
1592 	if (msg->msg_namelen)
1593 		msg->msg_namelen = 0;
1594 
1595 	return unix_dgram_sendmsg(kiocb, sock, msg, len);
1596 }
1597 
1598 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1599 {
1600 	struct unix_sock *u = unix_sk(sk);
1601 
1602 	msg->msg_namelen = 0;
1603 	if (u->addr) {
1604 		msg->msg_namelen = u->addr->len;
1605 		memcpy(msg->msg_name, u->addr->name, u->addr->len);
1606 	}
1607 }
1608 
1609 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1610 			      struct msghdr *msg, size_t size,
1611 			      int flags)
1612 {
1613 	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1614 	struct scm_cookie tmp_scm;
1615 	struct sock *sk = sock->sk;
1616 	struct unix_sock *u = unix_sk(sk);
1617 	int noblock = flags & MSG_DONTWAIT;
1618 	struct sk_buff *skb;
1619 	int err;
1620 
1621 	err = -EOPNOTSUPP;
1622 	if (flags&MSG_OOB)
1623 		goto out;
1624 
1625 	msg->msg_namelen = 0;
1626 
1627 	mutex_lock(&u->readlock);
1628 
1629 	skb = skb_recv_datagram(sk, flags, noblock, &err);
1630 	if (!skb) {
1631 		unix_state_lock(sk);
1632 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1633 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1634 		    (sk->sk_shutdown & RCV_SHUTDOWN))
1635 			err = 0;
1636 		unix_state_unlock(sk);
1637 		goto out_unlock;
1638 	}
1639 
1640 	wake_up_interruptible_sync(&u->peer_wait);
1641 
1642 	if (msg->msg_name)
1643 		unix_copy_addr(msg, skb->sk);
1644 
1645 	if (size > skb->len)
1646 		size = skb->len;
1647 	else if (size < skb->len)
1648 		msg->msg_flags |= MSG_TRUNC;
1649 
1650 	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1651 	if (err)
1652 		goto out_free;
1653 
1654 	if (!siocb->scm) {
1655 		siocb->scm = &tmp_scm;
1656 		memset(&tmp_scm, 0, sizeof(tmp_scm));
1657 	}
1658 	siocb->scm->creds = *UNIXCREDS(skb);
1659 	unix_set_secdata(siocb->scm, skb);
1660 
1661 	if (!(flags & MSG_PEEK))
1662 	{
1663 		if (UNIXCB(skb).fp)
1664 			unix_detach_fds(siocb->scm, skb);
1665 	}
1666 	else
1667 	{
1668 		/* It is questionable: on PEEK we could:
1669 		   - do not return fds - good, but too simple 8)
1670 		   - return fds, and do not return them on read (old strategy,
1671 		     apparently wrong)
1672 		   - clone fds (I chose it for now, it is the most universal
1673 		     solution)
1674 
1675 		   POSIX 1003.1g does not actually define this clearly
1676 		   at all. POSIX 1003.1g doesn't define a lot of things
1677 		   clearly however!
1678 
1679 		*/
1680 		if (UNIXCB(skb).fp)
1681 			siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1682 	}
1683 	err = size;
1684 
1685 	scm_recv(sock, msg, siocb->scm, flags);
1686 
1687 out_free:
1688 	skb_free_datagram(sk,skb);
1689 out_unlock:
1690 	mutex_unlock(&u->readlock);
1691 out:
1692 	return err;
1693 }
1694 
1695 /*
1696  *	Sleep until data has arrive. But check for races..
1697  */
1698 
1699 static long unix_stream_data_wait(struct sock * sk, long timeo)
1700 {
1701 	DEFINE_WAIT(wait);
1702 
1703 	unix_state_lock(sk);
1704 
1705 	for (;;) {
1706 		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1707 
1708 		if (!skb_queue_empty(&sk->sk_receive_queue) ||
1709 		    sk->sk_err ||
1710 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1711 		    signal_pending(current) ||
1712 		    !timeo)
1713 			break;
1714 
1715 		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1716 		unix_state_unlock(sk);
1717 		timeo = schedule_timeout(timeo);
1718 		unix_state_lock(sk);
1719 		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1720 	}
1721 
1722 	finish_wait(sk->sk_sleep, &wait);
1723 	unix_state_unlock(sk);
1724 	return timeo;
1725 }
1726 
1727 
1728 
1729 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1730 			       struct msghdr *msg, size_t size,
1731 			       int flags)
1732 {
1733 	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1734 	struct scm_cookie tmp_scm;
1735 	struct sock *sk = sock->sk;
1736 	struct unix_sock *u = unix_sk(sk);
1737 	struct sockaddr_un *sunaddr=msg->msg_name;
1738 	int copied = 0;
1739 	int check_creds = 0;
1740 	int target;
1741 	int err = 0;
1742 	long timeo;
1743 
1744 	err = -EINVAL;
1745 	if (sk->sk_state != TCP_ESTABLISHED)
1746 		goto out;
1747 
1748 	err = -EOPNOTSUPP;
1749 	if (flags&MSG_OOB)
1750 		goto out;
1751 
1752 	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1753 	timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1754 
1755 	msg->msg_namelen = 0;
1756 
1757 	/* Lock the socket to prevent queue disordering
1758 	 * while sleeps in memcpy_tomsg
1759 	 */
1760 
1761 	if (!siocb->scm) {
1762 		siocb->scm = &tmp_scm;
1763 		memset(&tmp_scm, 0, sizeof(tmp_scm));
1764 	}
1765 
1766 	mutex_lock(&u->readlock);
1767 
1768 	do
1769 	{
1770 		int chunk;
1771 		struct sk_buff *skb;
1772 
1773 		unix_state_lock(sk);
1774 		skb = skb_dequeue(&sk->sk_receive_queue);
1775 		if (skb==NULL)
1776 		{
1777 			if (copied >= target)
1778 				goto unlock;
1779 
1780 			/*
1781 			 *	POSIX 1003.1g mandates this order.
1782 			 */
1783 
1784 			if ((err = sock_error(sk)) != 0)
1785 				goto unlock;
1786 			if (sk->sk_shutdown & RCV_SHUTDOWN)
1787 				goto unlock;
1788 
1789 			unix_state_unlock(sk);
1790 			err = -EAGAIN;
1791 			if (!timeo)
1792 				break;
1793 			mutex_unlock(&u->readlock);
1794 
1795 			timeo = unix_stream_data_wait(sk, timeo);
1796 
1797 			if (signal_pending(current)) {
1798 				err = sock_intr_errno(timeo);
1799 				goto out;
1800 			}
1801 			mutex_lock(&u->readlock);
1802 			continue;
1803  unlock:
1804 			unix_state_unlock(sk);
1805 			break;
1806 		}
1807 		unix_state_unlock(sk);
1808 
1809 		if (check_creds) {
1810 			/* Never glue messages from different writers */
1811 			if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, sizeof(siocb->scm->creds)) != 0) {
1812 				skb_queue_head(&sk->sk_receive_queue, skb);
1813 				break;
1814 			}
1815 		} else {
1816 			/* Copy credentials */
1817 			siocb->scm->creds = *UNIXCREDS(skb);
1818 			check_creds = 1;
1819 		}
1820 
1821 		/* Copy address just once */
1822 		if (sunaddr)
1823 		{
1824 			unix_copy_addr(msg, skb->sk);
1825 			sunaddr = NULL;
1826 		}
1827 
1828 		chunk = min_t(unsigned int, skb->len, size);
1829 		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1830 			skb_queue_head(&sk->sk_receive_queue, skb);
1831 			if (copied == 0)
1832 				copied = -EFAULT;
1833 			break;
1834 		}
1835 		copied += chunk;
1836 		size -= chunk;
1837 
1838 		/* Mark read part of skb as used */
1839 		if (!(flags & MSG_PEEK))
1840 		{
1841 			skb_pull(skb, chunk);
1842 
1843 			if (UNIXCB(skb).fp)
1844 				unix_detach_fds(siocb->scm, skb);
1845 
1846 			/* put the skb back if we didn't use it up.. */
1847 			if (skb->len)
1848 			{
1849 				skb_queue_head(&sk->sk_receive_queue, skb);
1850 				break;
1851 			}
1852 
1853 			kfree_skb(skb);
1854 
1855 			if (siocb->scm->fp)
1856 				break;
1857 		}
1858 		else
1859 		{
1860 			/* It is questionable, see note in unix_dgram_recvmsg.
1861 			 */
1862 			if (UNIXCB(skb).fp)
1863 				siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1864 
1865 			/* put message back and return */
1866 			skb_queue_head(&sk->sk_receive_queue, skb);
1867 			break;
1868 		}
1869 	} while (size);
1870 
1871 	mutex_unlock(&u->readlock);
1872 	scm_recv(sock, msg, siocb->scm, flags);
1873 out:
1874 	return copied ? : err;
1875 }
1876 
1877 static int unix_shutdown(struct socket *sock, int mode)
1878 {
1879 	struct sock *sk = sock->sk;
1880 	struct sock *other;
1881 
1882 	mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1883 
1884 	if (mode) {
1885 		unix_state_lock(sk);
1886 		sk->sk_shutdown |= mode;
1887 		other=unix_peer(sk);
1888 		if (other)
1889 			sock_hold(other);
1890 		unix_state_unlock(sk);
1891 		sk->sk_state_change(sk);
1892 
1893 		if (other &&
1894 			(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1895 
1896 			int peer_mode = 0;
1897 
1898 			if (mode&RCV_SHUTDOWN)
1899 				peer_mode |= SEND_SHUTDOWN;
1900 			if (mode&SEND_SHUTDOWN)
1901 				peer_mode |= RCV_SHUTDOWN;
1902 			unix_state_lock(other);
1903 			other->sk_shutdown |= peer_mode;
1904 			unix_state_unlock(other);
1905 			other->sk_state_change(other);
1906 			read_lock(&other->sk_callback_lock);
1907 			if (peer_mode == SHUTDOWN_MASK)
1908 				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
1909 			else if (peer_mode & RCV_SHUTDOWN)
1910 				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1911 			read_unlock(&other->sk_callback_lock);
1912 		}
1913 		if (other)
1914 			sock_put(other);
1915 	}
1916 	return 0;
1917 }
1918 
1919 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1920 {
1921 	struct sock *sk = sock->sk;
1922 	long amount=0;
1923 	int err;
1924 
1925 	switch(cmd)
1926 	{
1927 		case SIOCOUTQ:
1928 			amount = atomic_read(&sk->sk_wmem_alloc);
1929 			err = put_user(amount, (int __user *)arg);
1930 			break;
1931 		case SIOCINQ:
1932 		{
1933 			struct sk_buff *skb;
1934 
1935 			if (sk->sk_state == TCP_LISTEN) {
1936 				err = -EINVAL;
1937 				break;
1938 			}
1939 
1940 			spin_lock(&sk->sk_receive_queue.lock);
1941 			if (sk->sk_type == SOCK_STREAM ||
1942 			    sk->sk_type == SOCK_SEQPACKET) {
1943 				skb_queue_walk(&sk->sk_receive_queue, skb)
1944 					amount += skb->len;
1945 			} else {
1946 				skb = skb_peek(&sk->sk_receive_queue);
1947 				if (skb)
1948 					amount=skb->len;
1949 			}
1950 			spin_unlock(&sk->sk_receive_queue.lock);
1951 			err = put_user(amount, (int __user *)arg);
1952 			break;
1953 		}
1954 
1955 		default:
1956 			err = -ENOIOCTLCMD;
1957 			break;
1958 	}
1959 	return err;
1960 }
1961 
1962 static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait)
1963 {
1964 	struct sock *sk = sock->sk;
1965 	unsigned int mask;
1966 
1967 	poll_wait(file, sk->sk_sleep, wait);
1968 	mask = 0;
1969 
1970 	/* exceptional events? */
1971 	if (sk->sk_err)
1972 		mask |= POLLERR;
1973 	if (sk->sk_shutdown == SHUTDOWN_MASK)
1974 		mask |= POLLHUP;
1975 	if (sk->sk_shutdown & RCV_SHUTDOWN)
1976 		mask |= POLLRDHUP;
1977 
1978 	/* readable? */
1979 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
1980 	    (sk->sk_shutdown & RCV_SHUTDOWN))
1981 		mask |= POLLIN | POLLRDNORM;
1982 
1983 	/* Connection-based need to check for termination and startup */
1984 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE)
1985 		mask |= POLLHUP;
1986 
1987 	/*
1988 	 * we set writable also when the other side has shut down the
1989 	 * connection. This prevents stuck sockets.
1990 	 */
1991 	if (unix_writable(sk))
1992 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
1993 
1994 	return mask;
1995 }
1996 
1997 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
1998 				    poll_table *wait)
1999 {
2000 	struct sock *sk = sock->sk, *other;
2001 	unsigned int mask, writable;
2002 
2003 	poll_wait(file, sk->sk_sleep, wait);
2004 	mask = 0;
2005 
2006 	/* exceptional events? */
2007 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2008 		mask |= POLLERR;
2009 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2010 		mask |= POLLRDHUP;
2011 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2012 		mask |= POLLHUP;
2013 
2014 	/* readable? */
2015 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
2016 	    (sk->sk_shutdown & RCV_SHUTDOWN))
2017 		mask |= POLLIN | POLLRDNORM;
2018 
2019 	/* Connection-based need to check for termination and startup */
2020 	if (sk->sk_type == SOCK_SEQPACKET) {
2021 		if (sk->sk_state == TCP_CLOSE)
2022 			mask |= POLLHUP;
2023 		/* connection hasn't started yet? */
2024 		if (sk->sk_state == TCP_SYN_SENT)
2025 			return mask;
2026 	}
2027 
2028 	/* writable? */
2029 	writable = unix_writable(sk);
2030 	if (writable) {
2031 		other = unix_peer_get(sk);
2032 		if (other) {
2033 			if (unix_peer(other) != sk) {
2034 				poll_wait(file, &unix_sk(other)->peer_wait,
2035 					  wait);
2036 				if (unix_recvq_full(other))
2037 					writable = 0;
2038 			}
2039 
2040 			sock_put(other);
2041 		}
2042 	}
2043 
2044 	if (writable)
2045 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2046 	else
2047 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2048 
2049 	return mask;
2050 }
2051 
2052 #ifdef CONFIG_PROC_FS
2053 static struct sock *first_unix_socket(int *i)
2054 {
2055 	for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
2056 		if (!hlist_empty(&unix_socket_table[*i]))
2057 			return __sk_head(&unix_socket_table[*i]);
2058 	}
2059 	return NULL;
2060 }
2061 
2062 static struct sock *next_unix_socket(int *i, struct sock *s)
2063 {
2064 	struct sock *next = sk_next(s);
2065 	/* More in this chain? */
2066 	if (next)
2067 		return next;
2068 	/* Look for next non-empty chain. */
2069 	for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2070 		if (!hlist_empty(&unix_socket_table[*i]))
2071 			return __sk_head(&unix_socket_table[*i]);
2072 	}
2073 	return NULL;
2074 }
2075 
2076 struct unix_iter_state {
2077 	struct seq_net_private p;
2078 	int i;
2079 };
2080 static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
2081 {
2082 	struct unix_iter_state *iter = seq->private;
2083 	loff_t off = 0;
2084 	struct sock *s;
2085 
2086 	for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2087 		if (sock_net(s) != seq_file_net(seq))
2088 			continue;
2089 		if (off == pos)
2090 			return s;
2091 		++off;
2092 	}
2093 	return NULL;
2094 }
2095 
2096 
2097 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2098 	__acquires(unix_table_lock)
2099 {
2100 	spin_lock(&unix_table_lock);
2101 	return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2102 }
2103 
2104 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2105 {
2106 	struct unix_iter_state *iter = seq->private;
2107 	struct sock *sk = v;
2108 	++*pos;
2109 
2110 	if (v == SEQ_START_TOKEN)
2111 		sk = first_unix_socket(&iter->i);
2112 	else
2113 		sk = next_unix_socket(&iter->i, sk);
2114 	while (sk && (sock_net(sk) != seq_file_net(seq)))
2115 		sk = next_unix_socket(&iter->i, sk);
2116 	return sk;
2117 }
2118 
2119 static void unix_seq_stop(struct seq_file *seq, void *v)
2120 	__releases(unix_table_lock)
2121 {
2122 	spin_unlock(&unix_table_lock);
2123 }
2124 
2125 static int unix_seq_show(struct seq_file *seq, void *v)
2126 {
2127 
2128 	if (v == SEQ_START_TOKEN)
2129 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2130 			 "Inode Path\n");
2131 	else {
2132 		struct sock *s = v;
2133 		struct unix_sock *u = unix_sk(s);
2134 		unix_state_lock(s);
2135 
2136 		seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
2137 			s,
2138 			atomic_read(&s->sk_refcnt),
2139 			0,
2140 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2141 			s->sk_type,
2142 			s->sk_socket ?
2143 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2144 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2145 			sock_i_ino(s));
2146 
2147 		if (u->addr) {
2148 			int i, len;
2149 			seq_putc(seq, ' ');
2150 
2151 			i = 0;
2152 			len = u->addr->len - sizeof(short);
2153 			if (!UNIX_ABSTRACT(s))
2154 				len--;
2155 			else {
2156 				seq_putc(seq, '@');
2157 				i++;
2158 			}
2159 			for ( ; i < len; i++)
2160 				seq_putc(seq, u->addr->name->sun_path[i]);
2161 		}
2162 		unix_state_unlock(s);
2163 		seq_putc(seq, '\n');
2164 	}
2165 
2166 	return 0;
2167 }
2168 
2169 static const struct seq_operations unix_seq_ops = {
2170 	.start  = unix_seq_start,
2171 	.next   = unix_seq_next,
2172 	.stop   = unix_seq_stop,
2173 	.show   = unix_seq_show,
2174 };
2175 
2176 
2177 static int unix_seq_open(struct inode *inode, struct file *file)
2178 {
2179 	return seq_open_net(inode, file, &unix_seq_ops,
2180 			    sizeof(struct unix_iter_state));
2181 }
2182 
2183 static const struct file_operations unix_seq_fops = {
2184 	.owner		= THIS_MODULE,
2185 	.open		= unix_seq_open,
2186 	.read		= seq_read,
2187 	.llseek		= seq_lseek,
2188 	.release	= seq_release_net,
2189 };
2190 
2191 #endif
2192 
2193 static struct net_proto_family unix_family_ops = {
2194 	.family = PF_UNIX,
2195 	.create = unix_create,
2196 	.owner	= THIS_MODULE,
2197 };
2198 
2199 
2200 static int unix_net_init(struct net *net)
2201 {
2202 	int error = -ENOMEM;
2203 
2204 	net->unx.sysctl_max_dgram_qlen = 10;
2205 	if (unix_sysctl_register(net))
2206 		goto out;
2207 
2208 #ifdef CONFIG_PROC_FS
2209 	if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2210 		unix_sysctl_unregister(net);
2211 		goto out;
2212 	}
2213 #endif
2214 	error = 0;
2215 out:
2216 	return 0;
2217 }
2218 
2219 static void unix_net_exit(struct net *net)
2220 {
2221 	unix_sysctl_unregister(net);
2222 	proc_net_remove(net, "unix");
2223 }
2224 
2225 static struct pernet_operations unix_net_ops = {
2226 	.init = unix_net_init,
2227 	.exit = unix_net_exit,
2228 };
2229 
2230 static int __init af_unix_init(void)
2231 {
2232 	int rc = -1;
2233 	struct sk_buff *dummy_skb;
2234 
2235 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2236 
2237 	rc = proto_register(&unix_proto, 1);
2238 	if (rc != 0) {
2239 		printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2240 		       __func__);
2241 		goto out;
2242 	}
2243 
2244 	sock_register(&unix_family_ops);
2245 	register_pernet_subsys(&unix_net_ops);
2246 out:
2247 	return rc;
2248 }
2249 
2250 static void __exit af_unix_exit(void)
2251 {
2252 	sock_unregister(PF_UNIX);
2253 	proto_unregister(&unix_proto);
2254 	unregister_pernet_subsys(&unix_net_ops);
2255 }
2256 
2257 /* Earlier than device_initcall() so that other drivers invoking
2258    request_module() don't end up in a loop when modprobe tries
2259    to use a UNIX socket. But later than subsys_initcall() because
2260    we depend on stuff initialised there */
2261 fs_initcall(af_unix_init);
2262 module_exit(af_unix_exit);
2263 
2264 MODULE_LICENSE("GPL");
2265 MODULE_ALIAS_NETPROTO(PF_UNIX);
2266