xref: /linux/net/unix/af_unix.c (revision 43f5b3085fdd27c4edf535d938b2cb0ccead4f75)
1 /*
2  * NET4:	Implementation of BSD Unix domain sockets.
3  *
4  * Authors:	Alan Cox, <alan.cox@linux.org>
5  *
6  *		This program is free software; you can redistribute it and/or
7  *		modify it under the terms of the GNU General Public License
8  *		as published by the Free Software Foundation; either version
9  *		2 of the License, or (at your option) any later version.
10  *
11  * Version:	$Id: af_unix.c,v 1.133 2002/02/08 03:57:19 davem Exp $
12  *
13  * Fixes:
14  *		Linus Torvalds	:	Assorted bug cures.
15  *		Niibe Yutaka	:	async I/O support.
16  *		Carsten Paeth	:	PF_UNIX check, address fixes.
17  *		Alan Cox	:	Limit size of allocated blocks.
18  *		Alan Cox	:	Fixed the stupid socketpair bug.
19  *		Alan Cox	:	BSD compatibility fine tuning.
20  *		Alan Cox	:	Fixed a bug in connect when interrupted.
21  *		Alan Cox	:	Sorted out a proper draft version of
22  *					file descriptor passing hacked up from
23  *					Mike Shaver's work.
24  *		Marty Leisner	:	Fixes to fd passing
25  *		Nick Nevin	:	recvmsg bugfix.
26  *		Alan Cox	:	Started proper garbage collector
27  *		Heiko EiBfeldt	:	Missing verify_area check
28  *		Alan Cox	:	Started POSIXisms
29  *		Andreas Schwab	:	Replace inode by dentry for proper
30  *					reference counting
31  *		Kirk Petersen	:	Made this a module
32  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
33  *					Lots of bug fixes.
34  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
35  *					by above two patches.
36  *	     Andrea Arcangeli	:	If possible we block in connect(2)
37  *					if the max backlog of the listen socket
38  *					is been reached. This won't break
39  *					old apps and it will avoid huge amount
40  *					of socks hashed (this for unix_gc()
41  *					performances reasons).
42  *					Security fix that limits the max
43  *					number of socks to 2*max_files and
44  *					the number of skb queueable in the
45  *					dgram receiver.
46  *		Artur Skawina   :	Hash function optimizations
47  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
48  *	      Malcolm Beattie   :	Set peercred for socketpair
49  *	     Michal Ostrowski   :       Module initialization cleanup.
50  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
51  *	     				the core infrastructure is doing that
52  *	     				for all net proto families now (2.5.69+)
53  *
54  *
55  * Known differences from reference BSD that was tested:
56  *
57  *	[TO FIX]
58  *	ECONNREFUSED is not returned from one end of a connected() socket to the
59  *		other the moment one end closes.
60  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
61  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
62  *	[NOT TO FIX]
63  *	accept() returns a path name even if the connecting socket has closed
64  *		in the meantime (BSD loses the path and gives up).
65  *	accept() returns 0 length path for an unbound connector. BSD returns 16
66  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
67  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
68  *	BSD af_unix apparently has connect forgetting to block properly.
69  *		(need to check this with the POSIX spec in detail)
70  *
71  * Differences from 2.0.0-11-... (ANK)
72  *	Bug fixes and improvements.
73  *		- client shutdown killed server socket.
74  *		- removed all useless cli/sti pairs.
75  *
76  *	Semantic changes/extensions.
77  *		- generic control message passing.
78  *		- SCM_CREDENTIALS control message.
79  *		- "Abstract" (not FS based) socket bindings.
80  *		  Abstract names are sequences of bytes (not zero terminated)
81  *		  started by 0, so that this name space does not intersect
82  *		  with BSD names.
83  */
84 
85 #include <linux/module.h>
86 #include <linux/kernel.h>
87 #include <linux/signal.h>
88 #include <linux/sched.h>
89 #include <linux/errno.h>
90 #include <linux/string.h>
91 #include <linux/stat.h>
92 #include <linux/dcache.h>
93 #include <linux/namei.h>
94 #include <linux/socket.h>
95 #include <linux/un.h>
96 #include <linux/fcntl.h>
97 #include <linux/termios.h>
98 #include <linux/sockios.h>
99 #include <linux/net.h>
100 #include <linux/in.h>
101 #include <linux/fs.h>
102 #include <linux/slab.h>
103 #include <asm/uaccess.h>
104 #include <linux/skbuff.h>
105 #include <linux/netdevice.h>
106 #include <net/net_namespace.h>
107 #include <net/sock.h>
108 #include <net/tcp_states.h>
109 #include <net/af_unix.h>
110 #include <linux/proc_fs.h>
111 #include <linux/seq_file.h>
112 #include <net/scm.h>
113 #include <linux/init.h>
114 #include <linux/poll.h>
115 #include <linux/rtnetlink.h>
116 #include <linux/mount.h>
117 #include <net/checksum.h>
118 #include <linux/security.h>
119 
120 static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
121 static DEFINE_SPINLOCK(unix_table_lock);
122 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
123 
124 #define unix_sockets_unbound	(&unix_socket_table[UNIX_HASH_SIZE])
125 
126 #define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
127 
128 #ifdef CONFIG_SECURITY_NETWORK
129 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
130 {
131 	memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
132 }
133 
134 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
135 {
136 	scm->secid = *UNIXSID(skb);
137 }
138 #else
139 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
140 { }
141 
142 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
143 { }
144 #endif /* CONFIG_SECURITY_NETWORK */
145 
146 /*
147  *  SMP locking strategy:
148  *    hash table is protected with spinlock unix_table_lock
149  *    each socket state is protected by separate rwlock.
150  */
151 
152 static inline unsigned unix_hash_fold(__wsum n)
153 {
154 	unsigned hash = (__force unsigned)n;
155 	hash ^= hash>>16;
156 	hash ^= hash>>8;
157 	return hash&(UNIX_HASH_SIZE-1);
158 }
159 
160 #define unix_peer(sk) (unix_sk(sk)->peer)
161 
162 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
163 {
164 	return unix_peer(osk) == sk;
165 }
166 
167 static inline int unix_may_send(struct sock *sk, struct sock *osk)
168 {
169 	return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
170 }
171 
172 static struct sock *unix_peer_get(struct sock *s)
173 {
174 	struct sock *peer;
175 
176 	unix_state_lock(s);
177 	peer = unix_peer(s);
178 	if (peer)
179 		sock_hold(peer);
180 	unix_state_unlock(s);
181 	return peer;
182 }
183 
184 static inline void unix_release_addr(struct unix_address *addr)
185 {
186 	if (atomic_dec_and_test(&addr->refcnt))
187 		kfree(addr);
188 }
189 
190 /*
191  *	Check unix socket name:
192  *		- should be not zero length.
193  *	        - if started by not zero, should be NULL terminated (FS object)
194  *		- if started by zero, it is abstract name.
195  */
196 
197 static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
198 {
199 	if (len <= sizeof(short) || len > sizeof(*sunaddr))
200 		return -EINVAL;
201 	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
202 		return -EINVAL;
203 	if (sunaddr->sun_path[0]) {
204 		/*
205 		 * This may look like an off by one error but it is a bit more
206 		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
207 		 * sun_path[108] doesnt as such exist.  However in kernel space
208 		 * we are guaranteed that it is a valid memory location in our
209 		 * kernel address buffer.
210 		 */
211 		((char *)sunaddr)[len]=0;
212 		len = strlen(sunaddr->sun_path)+1+sizeof(short);
213 		return len;
214 	}
215 
216 	*hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0));
217 	return len;
218 }
219 
220 static void __unix_remove_socket(struct sock *sk)
221 {
222 	sk_del_node_init(sk);
223 }
224 
225 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
226 {
227 	BUG_TRAP(sk_unhashed(sk));
228 	sk_add_node(sk, list);
229 }
230 
231 static inline void unix_remove_socket(struct sock *sk)
232 {
233 	spin_lock(&unix_table_lock);
234 	__unix_remove_socket(sk);
235 	spin_unlock(&unix_table_lock);
236 }
237 
238 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
239 {
240 	spin_lock(&unix_table_lock);
241 	__unix_insert_socket(list, sk);
242 	spin_unlock(&unix_table_lock);
243 }
244 
245 static struct sock *__unix_find_socket_byname(struct net *net,
246 					      struct sockaddr_un *sunname,
247 					      int len, int type, unsigned hash)
248 {
249 	struct sock *s;
250 	struct hlist_node *node;
251 
252 	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
253 		struct unix_sock *u = unix_sk(s);
254 
255 		if (!net_eq(sock_net(s), net))
256 			continue;
257 
258 		if (u->addr->len == len &&
259 		    !memcmp(u->addr->name, sunname, len))
260 			goto found;
261 	}
262 	s = NULL;
263 found:
264 	return s;
265 }
266 
267 static inline struct sock *unix_find_socket_byname(struct net *net,
268 						   struct sockaddr_un *sunname,
269 						   int len, int type,
270 						   unsigned hash)
271 {
272 	struct sock *s;
273 
274 	spin_lock(&unix_table_lock);
275 	s = __unix_find_socket_byname(net, sunname, len, type, hash);
276 	if (s)
277 		sock_hold(s);
278 	spin_unlock(&unix_table_lock);
279 	return s;
280 }
281 
282 static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
283 {
284 	struct sock *s;
285 	struct hlist_node *node;
286 
287 	spin_lock(&unix_table_lock);
288 	sk_for_each(s, node,
289 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
290 		struct dentry *dentry = unix_sk(s)->dentry;
291 
292 		if (!net_eq(sock_net(s), net))
293 			continue;
294 
295 		if(dentry && dentry->d_inode == i)
296 		{
297 			sock_hold(s);
298 			goto found;
299 		}
300 	}
301 	s = NULL;
302 found:
303 	spin_unlock(&unix_table_lock);
304 	return s;
305 }
306 
307 static inline int unix_writable(struct sock *sk)
308 {
309 	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
310 }
311 
312 static void unix_write_space(struct sock *sk)
313 {
314 	read_lock(&sk->sk_callback_lock);
315 	if (unix_writable(sk)) {
316 		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
317 			wake_up_interruptible_sync(sk->sk_sleep);
318 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
319 	}
320 	read_unlock(&sk->sk_callback_lock);
321 }
322 
323 /* When dgram socket disconnects (or changes its peer), we clear its receive
324  * queue of packets arrived from previous peer. First, it allows to do
325  * flow control based only on wmem_alloc; second, sk connected to peer
326  * may receive messages only from that peer. */
327 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
328 {
329 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
330 		skb_queue_purge(&sk->sk_receive_queue);
331 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
332 
333 		/* If one link of bidirectional dgram pipe is disconnected,
334 		 * we signal error. Messages are lost. Do not make this,
335 		 * when peer was not connected to us.
336 		 */
337 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
338 			other->sk_err = ECONNRESET;
339 			other->sk_error_report(other);
340 		}
341 	}
342 }
343 
344 static void unix_sock_destructor(struct sock *sk)
345 {
346 	struct unix_sock *u = unix_sk(sk);
347 
348 	skb_queue_purge(&sk->sk_receive_queue);
349 
350 	BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
351 	BUG_TRAP(sk_unhashed(sk));
352 	BUG_TRAP(!sk->sk_socket);
353 	if (!sock_flag(sk, SOCK_DEAD)) {
354 		printk("Attempt to release alive unix socket: %p\n", sk);
355 		return;
356 	}
357 
358 	if (u->addr)
359 		unix_release_addr(u->addr);
360 
361 	atomic_dec(&unix_nr_socks);
362 #ifdef UNIX_REFCNT_DEBUG
363 	printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));
364 #endif
365 }
366 
367 static int unix_release_sock (struct sock *sk, int embrion)
368 {
369 	struct unix_sock *u = unix_sk(sk);
370 	struct dentry *dentry;
371 	struct vfsmount *mnt;
372 	struct sock *skpair;
373 	struct sk_buff *skb;
374 	int state;
375 
376 	unix_remove_socket(sk);
377 
378 	/* Clear state */
379 	unix_state_lock(sk);
380 	sock_orphan(sk);
381 	sk->sk_shutdown = SHUTDOWN_MASK;
382 	dentry	     = u->dentry;
383 	u->dentry    = NULL;
384 	mnt	     = u->mnt;
385 	u->mnt	     = NULL;
386 	state = sk->sk_state;
387 	sk->sk_state = TCP_CLOSE;
388 	unix_state_unlock(sk);
389 
390 	wake_up_interruptible_all(&u->peer_wait);
391 
392 	skpair=unix_peer(sk);
393 
394 	if (skpair!=NULL) {
395 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
396 			unix_state_lock(skpair);
397 			/* No more writes */
398 			skpair->sk_shutdown = SHUTDOWN_MASK;
399 			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
400 				skpair->sk_err = ECONNRESET;
401 			unix_state_unlock(skpair);
402 			skpair->sk_state_change(skpair);
403 			read_lock(&skpair->sk_callback_lock);
404 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
405 			read_unlock(&skpair->sk_callback_lock);
406 		}
407 		sock_put(skpair); /* It may now die */
408 		unix_peer(sk) = NULL;
409 	}
410 
411 	/* Try to flush out this socket. Throw out buffers at least */
412 
413 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
414 		if (state==TCP_LISTEN)
415 			unix_release_sock(skb->sk, 1);
416 		/* passed fds are erased in the kfree_skb hook	      */
417 		kfree_skb(skb);
418 	}
419 
420 	if (dentry) {
421 		dput(dentry);
422 		mntput(mnt);
423 	}
424 
425 	sock_put(sk);
426 
427 	/* ---- Socket is dead now and most probably destroyed ---- */
428 
429 	/*
430 	 * Fixme: BSD difference: In BSD all sockets connected to use get
431 	 *	  ECONNRESET and we die on the spot. In Linux we behave
432 	 *	  like files and pipes do and wait for the last
433 	 *	  dereference.
434 	 *
435 	 * Can't we simply set sock->err?
436 	 *
437 	 *	  What the above comment does talk about? --ANK(980817)
438 	 */
439 
440 	if (unix_tot_inflight)
441 		unix_gc();		/* Garbage collect fds */
442 
443 	return 0;
444 }
445 
446 static int unix_listen(struct socket *sock, int backlog)
447 {
448 	int err;
449 	struct sock *sk = sock->sk;
450 	struct unix_sock *u = unix_sk(sk);
451 
452 	err = -EOPNOTSUPP;
453 	if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
454 		goto out;			/* Only stream/seqpacket sockets accept */
455 	err = -EINVAL;
456 	if (!u->addr)
457 		goto out;			/* No listens on an unbound socket */
458 	unix_state_lock(sk);
459 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
460 		goto out_unlock;
461 	if (backlog > sk->sk_max_ack_backlog)
462 		wake_up_interruptible_all(&u->peer_wait);
463 	sk->sk_max_ack_backlog	= backlog;
464 	sk->sk_state		= TCP_LISTEN;
465 	/* set credentials so connect can copy them */
466 	sk->sk_peercred.pid	= task_tgid_vnr(current);
467 	sk->sk_peercred.uid	= current->euid;
468 	sk->sk_peercred.gid	= current->egid;
469 	err = 0;
470 
471 out_unlock:
472 	unix_state_unlock(sk);
473 out:
474 	return err;
475 }
476 
477 static int unix_release(struct socket *);
478 static int unix_bind(struct socket *, struct sockaddr *, int);
479 static int unix_stream_connect(struct socket *, struct sockaddr *,
480 			       int addr_len, int flags);
481 static int unix_socketpair(struct socket *, struct socket *);
482 static int unix_accept(struct socket *, struct socket *, int);
483 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
484 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
485 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
486 static int unix_shutdown(struct socket *, int);
487 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
488 			       struct msghdr *, size_t);
489 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
490 			       struct msghdr *, size_t, int);
491 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
492 			      struct msghdr *, size_t);
493 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
494 			      struct msghdr *, size_t, int);
495 static int unix_dgram_connect(struct socket *, struct sockaddr *,
496 			      int, int);
497 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
498 				  struct msghdr *, size_t);
499 
500 static const struct proto_ops unix_stream_ops = {
501 	.family =	PF_UNIX,
502 	.owner =	THIS_MODULE,
503 	.release =	unix_release,
504 	.bind =		unix_bind,
505 	.connect =	unix_stream_connect,
506 	.socketpair =	unix_socketpair,
507 	.accept =	unix_accept,
508 	.getname =	unix_getname,
509 	.poll =		unix_poll,
510 	.ioctl =	unix_ioctl,
511 	.listen =	unix_listen,
512 	.shutdown =	unix_shutdown,
513 	.setsockopt =	sock_no_setsockopt,
514 	.getsockopt =	sock_no_getsockopt,
515 	.sendmsg =	unix_stream_sendmsg,
516 	.recvmsg =	unix_stream_recvmsg,
517 	.mmap =		sock_no_mmap,
518 	.sendpage =	sock_no_sendpage,
519 };
520 
521 static const struct proto_ops unix_dgram_ops = {
522 	.family =	PF_UNIX,
523 	.owner =	THIS_MODULE,
524 	.release =	unix_release,
525 	.bind =		unix_bind,
526 	.connect =	unix_dgram_connect,
527 	.socketpair =	unix_socketpair,
528 	.accept =	sock_no_accept,
529 	.getname =	unix_getname,
530 	.poll =		datagram_poll,
531 	.ioctl =	unix_ioctl,
532 	.listen =	sock_no_listen,
533 	.shutdown =	unix_shutdown,
534 	.setsockopt =	sock_no_setsockopt,
535 	.getsockopt =	sock_no_getsockopt,
536 	.sendmsg =	unix_dgram_sendmsg,
537 	.recvmsg =	unix_dgram_recvmsg,
538 	.mmap =		sock_no_mmap,
539 	.sendpage =	sock_no_sendpage,
540 };
541 
542 static const struct proto_ops unix_seqpacket_ops = {
543 	.family =	PF_UNIX,
544 	.owner =	THIS_MODULE,
545 	.release =	unix_release,
546 	.bind =		unix_bind,
547 	.connect =	unix_stream_connect,
548 	.socketpair =	unix_socketpair,
549 	.accept =	unix_accept,
550 	.getname =	unix_getname,
551 	.poll =		datagram_poll,
552 	.ioctl =	unix_ioctl,
553 	.listen =	unix_listen,
554 	.shutdown =	unix_shutdown,
555 	.setsockopt =	sock_no_setsockopt,
556 	.getsockopt =	sock_no_getsockopt,
557 	.sendmsg =	unix_seqpacket_sendmsg,
558 	.recvmsg =	unix_dgram_recvmsg,
559 	.mmap =		sock_no_mmap,
560 	.sendpage =	sock_no_sendpage,
561 };
562 
563 static struct proto unix_proto = {
564 	.name	  = "UNIX",
565 	.owner	  = THIS_MODULE,
566 	.obj_size = sizeof(struct unix_sock),
567 };
568 
569 /*
570  * AF_UNIX sockets do not interact with hardware, hence they
571  * dont trigger interrupts - so it's safe for them to have
572  * bh-unsafe locking for their sk_receive_queue.lock. Split off
573  * this special lock-class by reinitializing the spinlock key:
574  */
575 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
576 
577 static struct sock * unix_create1(struct net *net, struct socket *sock)
578 {
579 	struct sock *sk = NULL;
580 	struct unix_sock *u;
581 
582 	atomic_inc(&unix_nr_socks);
583 	if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
584 		goto out;
585 
586 	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
587 	if (!sk)
588 		goto out;
589 
590 	sock_init_data(sock,sk);
591 	lockdep_set_class(&sk->sk_receive_queue.lock,
592 				&af_unix_sk_receive_queue_lock_key);
593 
594 	sk->sk_write_space	= unix_write_space;
595 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
596 	sk->sk_destruct		= unix_sock_destructor;
597 	u	  = unix_sk(sk);
598 	u->dentry = NULL;
599 	u->mnt	  = NULL;
600 	spin_lock_init(&u->lock);
601 	atomic_set(&u->inflight, 0);
602 	INIT_LIST_HEAD(&u->link);
603 	mutex_init(&u->readlock); /* single task reading lock */
604 	init_waitqueue_head(&u->peer_wait);
605 	unix_insert_socket(unix_sockets_unbound, sk);
606 out:
607 	if (sk == NULL)
608 		atomic_dec(&unix_nr_socks);
609 	return sk;
610 }
611 
612 static int unix_create(struct net *net, struct socket *sock, int protocol)
613 {
614 	if (protocol && protocol != PF_UNIX)
615 		return -EPROTONOSUPPORT;
616 
617 	sock->state = SS_UNCONNECTED;
618 
619 	switch (sock->type) {
620 	case SOCK_STREAM:
621 		sock->ops = &unix_stream_ops;
622 		break;
623 		/*
624 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
625 		 *	nothing uses it.
626 		 */
627 	case SOCK_RAW:
628 		sock->type=SOCK_DGRAM;
629 	case SOCK_DGRAM:
630 		sock->ops = &unix_dgram_ops;
631 		break;
632 	case SOCK_SEQPACKET:
633 		sock->ops = &unix_seqpacket_ops;
634 		break;
635 	default:
636 		return -ESOCKTNOSUPPORT;
637 	}
638 
639 	return unix_create1(net, sock) ? 0 : -ENOMEM;
640 }
641 
642 static int unix_release(struct socket *sock)
643 {
644 	struct sock *sk = sock->sk;
645 
646 	if (!sk)
647 		return 0;
648 
649 	sock->sk = NULL;
650 
651 	return unix_release_sock (sk, 0);
652 }
653 
654 static int unix_autobind(struct socket *sock)
655 {
656 	struct sock *sk = sock->sk;
657 	struct net *net = sock_net(sk);
658 	struct unix_sock *u = unix_sk(sk);
659 	static u32 ordernum = 1;
660 	struct unix_address * addr;
661 	int err;
662 
663 	mutex_lock(&u->readlock);
664 
665 	err = 0;
666 	if (u->addr)
667 		goto out;
668 
669 	err = -ENOMEM;
670 	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
671 	if (!addr)
672 		goto out;
673 
674 	addr->name->sun_family = AF_UNIX;
675 	atomic_set(&addr->refcnt, 1);
676 
677 retry:
678 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
679 	addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
680 
681 	spin_lock(&unix_table_lock);
682 	ordernum = (ordernum+1)&0xFFFFF;
683 
684 	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
685 				      addr->hash)) {
686 		spin_unlock(&unix_table_lock);
687 		/* Sanity yield. It is unusual case, but yet... */
688 		if (!(ordernum&0xFF))
689 			yield();
690 		goto retry;
691 	}
692 	addr->hash ^= sk->sk_type;
693 
694 	__unix_remove_socket(sk);
695 	u->addr = addr;
696 	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
697 	spin_unlock(&unix_table_lock);
698 	err = 0;
699 
700 out:	mutex_unlock(&u->readlock);
701 	return err;
702 }
703 
704 static struct sock *unix_find_other(struct net *net,
705 				    struct sockaddr_un *sunname, int len,
706 				    int type, unsigned hash, int *error)
707 {
708 	struct sock *u;
709 	struct nameidata nd;
710 	int err = 0;
711 
712 	if (sunname->sun_path[0]) {
713 		err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
714 		if (err)
715 			goto fail;
716 		err = vfs_permission(&nd, MAY_WRITE);
717 		if (err)
718 			goto put_fail;
719 
720 		err = -ECONNREFUSED;
721 		if (!S_ISSOCK(nd.path.dentry->d_inode->i_mode))
722 			goto put_fail;
723 		u = unix_find_socket_byinode(net, nd.path.dentry->d_inode);
724 		if (!u)
725 			goto put_fail;
726 
727 		if (u->sk_type == type)
728 			touch_atime(nd.path.mnt, nd.path.dentry);
729 
730 		path_put(&nd.path);
731 
732 		err=-EPROTOTYPE;
733 		if (u->sk_type != type) {
734 			sock_put(u);
735 			goto fail;
736 		}
737 	} else {
738 		err = -ECONNREFUSED;
739 		u=unix_find_socket_byname(net, sunname, len, type, hash);
740 		if (u) {
741 			struct dentry *dentry;
742 			dentry = unix_sk(u)->dentry;
743 			if (dentry)
744 				touch_atime(unix_sk(u)->mnt, dentry);
745 		} else
746 			goto fail;
747 	}
748 	return u;
749 
750 put_fail:
751 	path_put(&nd.path);
752 fail:
753 	*error=err;
754 	return NULL;
755 }
756 
757 
758 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
759 {
760 	struct sock *sk = sock->sk;
761 	struct net *net = sock_net(sk);
762 	struct unix_sock *u = unix_sk(sk);
763 	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
764 	struct dentry * dentry = NULL;
765 	struct nameidata nd;
766 	int err;
767 	unsigned hash;
768 	struct unix_address *addr;
769 	struct hlist_head *list;
770 
771 	err = -EINVAL;
772 	if (sunaddr->sun_family != AF_UNIX)
773 		goto out;
774 
775 	if (addr_len==sizeof(short)) {
776 		err = unix_autobind(sock);
777 		goto out;
778 	}
779 
780 	err = unix_mkname(sunaddr, addr_len, &hash);
781 	if (err < 0)
782 		goto out;
783 	addr_len = err;
784 
785 	mutex_lock(&u->readlock);
786 
787 	err = -EINVAL;
788 	if (u->addr)
789 		goto out_up;
790 
791 	err = -ENOMEM;
792 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
793 	if (!addr)
794 		goto out_up;
795 
796 	memcpy(addr->name, sunaddr, addr_len);
797 	addr->len = addr_len;
798 	addr->hash = hash ^ sk->sk_type;
799 	atomic_set(&addr->refcnt, 1);
800 
801 	if (sunaddr->sun_path[0]) {
802 		unsigned int mode;
803 		err = 0;
804 		/*
805 		 * Get the parent directory, calculate the hash for last
806 		 * component.
807 		 */
808 		err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
809 		if (err)
810 			goto out_mknod_parent;
811 
812 		dentry = lookup_create(&nd, 0);
813 		err = PTR_ERR(dentry);
814 		if (IS_ERR(dentry))
815 			goto out_mknod_unlock;
816 
817 		/*
818 		 * All right, let's create it.
819 		 */
820 		mode = S_IFSOCK |
821 		       (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
822 		err = mnt_want_write(nd.path.mnt);
823 		if (err)
824 			goto out_mknod_dput;
825 		err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
826 		mnt_drop_write(nd.path.mnt);
827 		if (err)
828 			goto out_mknod_dput;
829 		mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
830 		dput(nd.path.dentry);
831 		nd.path.dentry = dentry;
832 
833 		addr->hash = UNIX_HASH_SIZE;
834 	}
835 
836 	spin_lock(&unix_table_lock);
837 
838 	if (!sunaddr->sun_path[0]) {
839 		err = -EADDRINUSE;
840 		if (__unix_find_socket_byname(net, sunaddr, addr_len,
841 					      sk->sk_type, hash)) {
842 			unix_release_addr(addr);
843 			goto out_unlock;
844 		}
845 
846 		list = &unix_socket_table[addr->hash];
847 	} else {
848 		list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
849 		u->dentry = nd.path.dentry;
850 		u->mnt    = nd.path.mnt;
851 	}
852 
853 	err = 0;
854 	__unix_remove_socket(sk);
855 	u->addr = addr;
856 	__unix_insert_socket(list, sk);
857 
858 out_unlock:
859 	spin_unlock(&unix_table_lock);
860 out_up:
861 	mutex_unlock(&u->readlock);
862 out:
863 	return err;
864 
865 out_mknod_dput:
866 	dput(dentry);
867 out_mknod_unlock:
868 	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
869 	path_put(&nd.path);
870 out_mknod_parent:
871 	if (err==-EEXIST)
872 		err=-EADDRINUSE;
873 	unix_release_addr(addr);
874 	goto out_up;
875 }
876 
877 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
878 {
879 	if (unlikely(sk1 == sk2) || !sk2) {
880 		unix_state_lock(sk1);
881 		return;
882 	}
883 	if (sk1 < sk2) {
884 		unix_state_lock(sk1);
885 		unix_state_lock_nested(sk2);
886 	} else {
887 		unix_state_lock(sk2);
888 		unix_state_lock_nested(sk1);
889 	}
890 }
891 
892 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
893 {
894 	if (unlikely(sk1 == sk2) || !sk2) {
895 		unix_state_unlock(sk1);
896 		return;
897 	}
898 	unix_state_unlock(sk1);
899 	unix_state_unlock(sk2);
900 }
901 
902 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
903 			      int alen, int flags)
904 {
905 	struct sock *sk = sock->sk;
906 	struct net *net = sock_net(sk);
907 	struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
908 	struct sock *other;
909 	unsigned hash;
910 	int err;
911 
912 	if (addr->sa_family != AF_UNSPEC) {
913 		err = unix_mkname(sunaddr, alen, &hash);
914 		if (err < 0)
915 			goto out;
916 		alen = err;
917 
918 		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
919 		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
920 			goto out;
921 
922 restart:
923 		other=unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
924 		if (!other)
925 			goto out;
926 
927 		unix_state_double_lock(sk, other);
928 
929 		/* Apparently VFS overslept socket death. Retry. */
930 		if (sock_flag(other, SOCK_DEAD)) {
931 			unix_state_double_unlock(sk, other);
932 			sock_put(other);
933 			goto restart;
934 		}
935 
936 		err = -EPERM;
937 		if (!unix_may_send(sk, other))
938 			goto out_unlock;
939 
940 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
941 		if (err)
942 			goto out_unlock;
943 
944 	} else {
945 		/*
946 		 *	1003.1g breaking connected state with AF_UNSPEC
947 		 */
948 		other = NULL;
949 		unix_state_double_lock(sk, other);
950 	}
951 
952 	/*
953 	 * If it was connected, reconnect.
954 	 */
955 	if (unix_peer(sk)) {
956 		struct sock *old_peer = unix_peer(sk);
957 		unix_peer(sk)=other;
958 		unix_state_double_unlock(sk, other);
959 
960 		if (other != old_peer)
961 			unix_dgram_disconnected(sk, old_peer);
962 		sock_put(old_peer);
963 	} else {
964 		unix_peer(sk)=other;
965 		unix_state_double_unlock(sk, other);
966 	}
967 	return 0;
968 
969 out_unlock:
970 	unix_state_double_unlock(sk, other);
971 	sock_put(other);
972 out:
973 	return err;
974 }
975 
976 static long unix_wait_for_peer(struct sock *other, long timeo)
977 {
978 	struct unix_sock *u = unix_sk(other);
979 	int sched;
980 	DEFINE_WAIT(wait);
981 
982 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
983 
984 	sched = !sock_flag(other, SOCK_DEAD) &&
985 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
986 		(skb_queue_len(&other->sk_receive_queue) >
987 		 other->sk_max_ack_backlog);
988 
989 	unix_state_unlock(other);
990 
991 	if (sched)
992 		timeo = schedule_timeout(timeo);
993 
994 	finish_wait(&u->peer_wait, &wait);
995 	return timeo;
996 }
997 
998 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
999 			       int addr_len, int flags)
1000 {
1001 	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1002 	struct sock *sk = sock->sk;
1003 	struct net *net = sock_net(sk);
1004 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1005 	struct sock *newsk = NULL;
1006 	struct sock *other = NULL;
1007 	struct sk_buff *skb = NULL;
1008 	unsigned hash;
1009 	int st;
1010 	int err;
1011 	long timeo;
1012 
1013 	err = unix_mkname(sunaddr, addr_len, &hash);
1014 	if (err < 0)
1015 		goto out;
1016 	addr_len = err;
1017 
1018 	if (test_bit(SOCK_PASSCRED, &sock->flags)
1019 		&& !u->addr && (err = unix_autobind(sock)) != 0)
1020 		goto out;
1021 
1022 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1023 
1024 	/* First of all allocate resources.
1025 	   If we will make it after state is locked,
1026 	   we will have to recheck all again in any case.
1027 	 */
1028 
1029 	err = -ENOMEM;
1030 
1031 	/* create new sock for complete connection */
1032 	newsk = unix_create1(sock_net(sk), NULL);
1033 	if (newsk == NULL)
1034 		goto out;
1035 
1036 	/* Allocate skb for sending to listening sock */
1037 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1038 	if (skb == NULL)
1039 		goto out;
1040 
1041 restart:
1042 	/*  Find listening sock. */
1043 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1044 	if (!other)
1045 		goto out;
1046 
1047 	/* Latch state of peer */
1048 	unix_state_lock(other);
1049 
1050 	/* Apparently VFS overslept socket death. Retry. */
1051 	if (sock_flag(other, SOCK_DEAD)) {
1052 		unix_state_unlock(other);
1053 		sock_put(other);
1054 		goto restart;
1055 	}
1056 
1057 	err = -ECONNREFUSED;
1058 	if (other->sk_state != TCP_LISTEN)
1059 		goto out_unlock;
1060 
1061 	if (skb_queue_len(&other->sk_receive_queue) >
1062 	    other->sk_max_ack_backlog) {
1063 		err = -EAGAIN;
1064 		if (!timeo)
1065 			goto out_unlock;
1066 
1067 		timeo = unix_wait_for_peer(other, timeo);
1068 
1069 		err = sock_intr_errno(timeo);
1070 		if (signal_pending(current))
1071 			goto out;
1072 		sock_put(other);
1073 		goto restart;
1074 	}
1075 
1076 	/* Latch our state.
1077 
1078 	   It is tricky place. We need to grab write lock and cannot
1079 	   drop lock on peer. It is dangerous because deadlock is
1080 	   possible. Connect to self case and simultaneous
1081 	   attempt to connect are eliminated by checking socket
1082 	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1083 	   check this before attempt to grab lock.
1084 
1085 	   Well, and we have to recheck the state after socket locked.
1086 	 */
1087 	st = sk->sk_state;
1088 
1089 	switch (st) {
1090 	case TCP_CLOSE:
1091 		/* This is ok... continue with connect */
1092 		break;
1093 	case TCP_ESTABLISHED:
1094 		/* Socket is already connected */
1095 		err = -EISCONN;
1096 		goto out_unlock;
1097 	default:
1098 		err = -EINVAL;
1099 		goto out_unlock;
1100 	}
1101 
1102 	unix_state_lock_nested(sk);
1103 
1104 	if (sk->sk_state != st) {
1105 		unix_state_unlock(sk);
1106 		unix_state_unlock(other);
1107 		sock_put(other);
1108 		goto restart;
1109 	}
1110 
1111 	err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1112 	if (err) {
1113 		unix_state_unlock(sk);
1114 		goto out_unlock;
1115 	}
1116 
1117 	/* The way is open! Fastly set all the necessary fields... */
1118 
1119 	sock_hold(sk);
1120 	unix_peer(newsk)	= sk;
1121 	newsk->sk_state		= TCP_ESTABLISHED;
1122 	newsk->sk_type		= sk->sk_type;
1123 	newsk->sk_peercred.pid	= task_tgid_vnr(current);
1124 	newsk->sk_peercred.uid	= current->euid;
1125 	newsk->sk_peercred.gid	= current->egid;
1126 	newu = unix_sk(newsk);
1127 	newsk->sk_sleep		= &newu->peer_wait;
1128 	otheru = unix_sk(other);
1129 
1130 	/* copy address information from listening to new sock*/
1131 	if (otheru->addr) {
1132 		atomic_inc(&otheru->addr->refcnt);
1133 		newu->addr = otheru->addr;
1134 	}
1135 	if (otheru->dentry) {
1136 		newu->dentry	= dget(otheru->dentry);
1137 		newu->mnt	= mntget(otheru->mnt);
1138 	}
1139 
1140 	/* Set credentials */
1141 	sk->sk_peercred = other->sk_peercred;
1142 
1143 	sock->state	= SS_CONNECTED;
1144 	sk->sk_state	= TCP_ESTABLISHED;
1145 	sock_hold(newsk);
1146 
1147 	smp_mb__after_atomic_inc();	/* sock_hold() does an atomic_inc() */
1148 	unix_peer(sk)	= newsk;
1149 
1150 	unix_state_unlock(sk);
1151 
1152 	/* take ten and and send info to listening sock */
1153 	spin_lock(&other->sk_receive_queue.lock);
1154 	__skb_queue_tail(&other->sk_receive_queue, skb);
1155 	spin_unlock(&other->sk_receive_queue.lock);
1156 	unix_state_unlock(other);
1157 	other->sk_data_ready(other, 0);
1158 	sock_put(other);
1159 	return 0;
1160 
1161 out_unlock:
1162 	if (other)
1163 		unix_state_unlock(other);
1164 
1165 out:
1166 	if (skb)
1167 		kfree_skb(skb);
1168 	if (newsk)
1169 		unix_release_sock(newsk, 0);
1170 	if (other)
1171 		sock_put(other);
1172 	return err;
1173 }
1174 
1175 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1176 {
1177 	struct sock *ska=socka->sk, *skb = sockb->sk;
1178 
1179 	/* Join our sockets back to back */
1180 	sock_hold(ska);
1181 	sock_hold(skb);
1182 	unix_peer(ska)=skb;
1183 	unix_peer(skb)=ska;
1184 	ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
1185 	ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
1186 	ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
1187 
1188 	if (ska->sk_type != SOCK_DGRAM) {
1189 		ska->sk_state = TCP_ESTABLISHED;
1190 		skb->sk_state = TCP_ESTABLISHED;
1191 		socka->state  = SS_CONNECTED;
1192 		sockb->state  = SS_CONNECTED;
1193 	}
1194 	return 0;
1195 }
1196 
1197 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1198 {
1199 	struct sock *sk = sock->sk;
1200 	struct sock *tsk;
1201 	struct sk_buff *skb;
1202 	int err;
1203 
1204 	err = -EOPNOTSUPP;
1205 	if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
1206 		goto out;
1207 
1208 	err = -EINVAL;
1209 	if (sk->sk_state != TCP_LISTEN)
1210 		goto out;
1211 
1212 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1213 	 * so that no locks are necessary.
1214 	 */
1215 
1216 	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1217 	if (!skb) {
1218 		/* This means receive shutdown. */
1219 		if (err == 0)
1220 			err = -EINVAL;
1221 		goto out;
1222 	}
1223 
1224 	tsk = skb->sk;
1225 	skb_free_datagram(sk, skb);
1226 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1227 
1228 	/* attach accepted sock to socket */
1229 	unix_state_lock(tsk);
1230 	newsock->state = SS_CONNECTED;
1231 	sock_graft(tsk, newsock);
1232 	unix_state_unlock(tsk);
1233 	return 0;
1234 
1235 out:
1236 	return err;
1237 }
1238 
1239 
1240 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1241 {
1242 	struct sock *sk = sock->sk;
1243 	struct unix_sock *u;
1244 	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1245 	int err = 0;
1246 
1247 	if (peer) {
1248 		sk = unix_peer_get(sk);
1249 
1250 		err = -ENOTCONN;
1251 		if (!sk)
1252 			goto out;
1253 		err = 0;
1254 	} else {
1255 		sock_hold(sk);
1256 	}
1257 
1258 	u = unix_sk(sk);
1259 	unix_state_lock(sk);
1260 	if (!u->addr) {
1261 		sunaddr->sun_family = AF_UNIX;
1262 		sunaddr->sun_path[0] = 0;
1263 		*uaddr_len = sizeof(short);
1264 	} else {
1265 		struct unix_address *addr = u->addr;
1266 
1267 		*uaddr_len = addr->len;
1268 		memcpy(sunaddr, addr->name, *uaddr_len);
1269 	}
1270 	unix_state_unlock(sk);
1271 	sock_put(sk);
1272 out:
1273 	return err;
1274 }
1275 
1276 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1277 {
1278 	int i;
1279 
1280 	scm->fp = UNIXCB(skb).fp;
1281 	skb->destructor = sock_wfree;
1282 	UNIXCB(skb).fp = NULL;
1283 
1284 	for (i=scm->fp->count-1; i>=0; i--)
1285 		unix_notinflight(scm->fp->fp[i]);
1286 }
1287 
1288 static void unix_destruct_fds(struct sk_buff *skb)
1289 {
1290 	struct scm_cookie scm;
1291 	memset(&scm, 0, sizeof(scm));
1292 	unix_detach_fds(&scm, skb);
1293 
1294 	/* Alas, it calls VFS */
1295 	/* So fscking what? fput() had been SMP-safe since the last Summer */
1296 	scm_destroy(&scm);
1297 	sock_wfree(skb);
1298 }
1299 
1300 static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1301 {
1302 	int i;
1303 	for (i=scm->fp->count-1; i>=0; i--)
1304 		unix_inflight(scm->fp->fp[i]);
1305 	UNIXCB(skb).fp = scm->fp;
1306 	skb->destructor = unix_destruct_fds;
1307 	scm->fp = NULL;
1308 }
1309 
1310 /*
1311  *	Send AF_UNIX data.
1312  */
1313 
1314 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1315 			      struct msghdr *msg, size_t len)
1316 {
1317 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1318 	struct sock *sk = sock->sk;
1319 	struct net *net = sock_net(sk);
1320 	struct unix_sock *u = unix_sk(sk);
1321 	struct sockaddr_un *sunaddr=msg->msg_name;
1322 	struct sock *other = NULL;
1323 	int namelen = 0; /* fake GCC */
1324 	int err;
1325 	unsigned hash;
1326 	struct sk_buff *skb;
1327 	long timeo;
1328 	struct scm_cookie tmp_scm;
1329 
1330 	if (NULL == siocb->scm)
1331 		siocb->scm = &tmp_scm;
1332 	err = scm_send(sock, msg, siocb->scm);
1333 	if (err < 0)
1334 		return err;
1335 
1336 	err = -EOPNOTSUPP;
1337 	if (msg->msg_flags&MSG_OOB)
1338 		goto out;
1339 
1340 	if (msg->msg_namelen) {
1341 		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1342 		if (err < 0)
1343 			goto out;
1344 		namelen = err;
1345 	} else {
1346 		sunaddr = NULL;
1347 		err = -ENOTCONN;
1348 		other = unix_peer_get(sk);
1349 		if (!other)
1350 			goto out;
1351 	}
1352 
1353 	if (test_bit(SOCK_PASSCRED, &sock->flags)
1354 		&& !u->addr && (err = unix_autobind(sock)) != 0)
1355 		goto out;
1356 
1357 	err = -EMSGSIZE;
1358 	if (len > sk->sk_sndbuf - 32)
1359 		goto out;
1360 
1361 	skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1362 	if (skb==NULL)
1363 		goto out;
1364 
1365 	memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1366 	if (siocb->scm->fp)
1367 		unix_attach_fds(siocb->scm, skb);
1368 	unix_get_secdata(siocb->scm, skb);
1369 
1370 	skb_reset_transport_header(skb);
1371 	err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
1372 	if (err)
1373 		goto out_free;
1374 
1375 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1376 
1377 restart:
1378 	if (!other) {
1379 		err = -ECONNRESET;
1380 		if (sunaddr == NULL)
1381 			goto out_free;
1382 
1383 		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1384 					hash, &err);
1385 		if (other==NULL)
1386 			goto out_free;
1387 	}
1388 
1389 	unix_state_lock(other);
1390 	err = -EPERM;
1391 	if (!unix_may_send(sk, other))
1392 		goto out_unlock;
1393 
1394 	if (sock_flag(other, SOCK_DEAD)) {
1395 		/*
1396 		 *	Check with 1003.1g - what should
1397 		 *	datagram error
1398 		 */
1399 		unix_state_unlock(other);
1400 		sock_put(other);
1401 
1402 		err = 0;
1403 		unix_state_lock(sk);
1404 		if (unix_peer(sk) == other) {
1405 			unix_peer(sk)=NULL;
1406 			unix_state_unlock(sk);
1407 
1408 			unix_dgram_disconnected(sk, other);
1409 			sock_put(other);
1410 			err = -ECONNREFUSED;
1411 		} else {
1412 			unix_state_unlock(sk);
1413 		}
1414 
1415 		other = NULL;
1416 		if (err)
1417 			goto out_free;
1418 		goto restart;
1419 	}
1420 
1421 	err = -EPIPE;
1422 	if (other->sk_shutdown & RCV_SHUTDOWN)
1423 		goto out_unlock;
1424 
1425 	if (sk->sk_type != SOCK_SEQPACKET) {
1426 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1427 		if (err)
1428 			goto out_unlock;
1429 	}
1430 
1431 	if (unix_peer(other) != sk &&
1432 	    (skb_queue_len(&other->sk_receive_queue) >
1433 	     other->sk_max_ack_backlog)) {
1434 		if (!timeo) {
1435 			err = -EAGAIN;
1436 			goto out_unlock;
1437 		}
1438 
1439 		timeo = unix_wait_for_peer(other, timeo);
1440 
1441 		err = sock_intr_errno(timeo);
1442 		if (signal_pending(current))
1443 			goto out_free;
1444 
1445 		goto restart;
1446 	}
1447 
1448 	skb_queue_tail(&other->sk_receive_queue, skb);
1449 	unix_state_unlock(other);
1450 	other->sk_data_ready(other, len);
1451 	sock_put(other);
1452 	scm_destroy(siocb->scm);
1453 	return len;
1454 
1455 out_unlock:
1456 	unix_state_unlock(other);
1457 out_free:
1458 	kfree_skb(skb);
1459 out:
1460 	if (other)
1461 		sock_put(other);
1462 	scm_destroy(siocb->scm);
1463 	return err;
1464 }
1465 
1466 
1467 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1468 			       struct msghdr *msg, size_t len)
1469 {
1470 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1471 	struct sock *sk = sock->sk;
1472 	struct sock *other = NULL;
1473 	struct sockaddr_un *sunaddr=msg->msg_name;
1474 	int err,size;
1475 	struct sk_buff *skb;
1476 	int sent=0;
1477 	struct scm_cookie tmp_scm;
1478 
1479 	if (NULL == siocb->scm)
1480 		siocb->scm = &tmp_scm;
1481 	err = scm_send(sock, msg, siocb->scm);
1482 	if (err < 0)
1483 		return err;
1484 
1485 	err = -EOPNOTSUPP;
1486 	if (msg->msg_flags&MSG_OOB)
1487 		goto out_err;
1488 
1489 	if (msg->msg_namelen) {
1490 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1491 		goto out_err;
1492 	} else {
1493 		sunaddr = NULL;
1494 		err = -ENOTCONN;
1495 		other = unix_peer(sk);
1496 		if (!other)
1497 			goto out_err;
1498 	}
1499 
1500 	if (sk->sk_shutdown & SEND_SHUTDOWN)
1501 		goto pipe_err;
1502 
1503 	while(sent < len)
1504 	{
1505 		/*
1506 		 *	Optimisation for the fact that under 0.01% of X
1507 		 *	messages typically need breaking up.
1508 		 */
1509 
1510 		size = len-sent;
1511 
1512 		/* Keep two messages in the pipe so it schedules better */
1513 		if (size > ((sk->sk_sndbuf >> 1) - 64))
1514 			size = (sk->sk_sndbuf >> 1) - 64;
1515 
1516 		if (size > SKB_MAX_ALLOC)
1517 			size = SKB_MAX_ALLOC;
1518 
1519 		/*
1520 		 *	Grab a buffer
1521 		 */
1522 
1523 		skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
1524 
1525 		if (skb==NULL)
1526 			goto out_err;
1527 
1528 		/*
1529 		 *	If you pass two values to the sock_alloc_send_skb
1530 		 *	it tries to grab the large buffer with GFP_NOFS
1531 		 *	(which can fail easily), and if it fails grab the
1532 		 *	fallback size buffer which is under a page and will
1533 		 *	succeed. [Alan]
1534 		 */
1535 		size = min_t(int, size, skb_tailroom(skb));
1536 
1537 		memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1538 		if (siocb->scm->fp)
1539 			unix_attach_fds(siocb->scm, skb);
1540 
1541 		if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
1542 			kfree_skb(skb);
1543 			goto out_err;
1544 		}
1545 
1546 		unix_state_lock(other);
1547 
1548 		if (sock_flag(other, SOCK_DEAD) ||
1549 		    (other->sk_shutdown & RCV_SHUTDOWN))
1550 			goto pipe_err_free;
1551 
1552 		skb_queue_tail(&other->sk_receive_queue, skb);
1553 		unix_state_unlock(other);
1554 		other->sk_data_ready(other, size);
1555 		sent+=size;
1556 	}
1557 
1558 	scm_destroy(siocb->scm);
1559 	siocb->scm = NULL;
1560 
1561 	return sent;
1562 
1563 pipe_err_free:
1564 	unix_state_unlock(other);
1565 	kfree_skb(skb);
1566 pipe_err:
1567 	if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
1568 		send_sig(SIGPIPE,current,0);
1569 	err = -EPIPE;
1570 out_err:
1571 	scm_destroy(siocb->scm);
1572 	siocb->scm = NULL;
1573 	return sent ? : err;
1574 }
1575 
1576 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1577 				  struct msghdr *msg, size_t len)
1578 {
1579 	int err;
1580 	struct sock *sk = sock->sk;
1581 
1582 	err = sock_error(sk);
1583 	if (err)
1584 		return err;
1585 
1586 	if (sk->sk_state != TCP_ESTABLISHED)
1587 		return -ENOTCONN;
1588 
1589 	if (msg->msg_namelen)
1590 		msg->msg_namelen = 0;
1591 
1592 	return unix_dgram_sendmsg(kiocb, sock, msg, len);
1593 }
1594 
1595 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1596 {
1597 	struct unix_sock *u = unix_sk(sk);
1598 
1599 	msg->msg_namelen = 0;
1600 	if (u->addr) {
1601 		msg->msg_namelen = u->addr->len;
1602 		memcpy(msg->msg_name, u->addr->name, u->addr->len);
1603 	}
1604 }
1605 
1606 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1607 			      struct msghdr *msg, size_t size,
1608 			      int flags)
1609 {
1610 	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1611 	struct scm_cookie tmp_scm;
1612 	struct sock *sk = sock->sk;
1613 	struct unix_sock *u = unix_sk(sk);
1614 	int noblock = flags & MSG_DONTWAIT;
1615 	struct sk_buff *skb;
1616 	int err;
1617 
1618 	err = -EOPNOTSUPP;
1619 	if (flags&MSG_OOB)
1620 		goto out;
1621 
1622 	msg->msg_namelen = 0;
1623 
1624 	mutex_lock(&u->readlock);
1625 
1626 	skb = skb_recv_datagram(sk, flags, noblock, &err);
1627 	if (!skb) {
1628 		unix_state_lock(sk);
1629 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1630 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1631 		    (sk->sk_shutdown & RCV_SHUTDOWN))
1632 			err = 0;
1633 		unix_state_unlock(sk);
1634 		goto out_unlock;
1635 	}
1636 
1637 	wake_up_interruptible_sync(&u->peer_wait);
1638 
1639 	if (msg->msg_name)
1640 		unix_copy_addr(msg, skb->sk);
1641 
1642 	if (size > skb->len)
1643 		size = skb->len;
1644 	else if (size < skb->len)
1645 		msg->msg_flags |= MSG_TRUNC;
1646 
1647 	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1648 	if (err)
1649 		goto out_free;
1650 
1651 	if (!siocb->scm) {
1652 		siocb->scm = &tmp_scm;
1653 		memset(&tmp_scm, 0, sizeof(tmp_scm));
1654 	}
1655 	siocb->scm->creds = *UNIXCREDS(skb);
1656 	unix_set_secdata(siocb->scm, skb);
1657 
1658 	if (!(flags & MSG_PEEK))
1659 	{
1660 		if (UNIXCB(skb).fp)
1661 			unix_detach_fds(siocb->scm, skb);
1662 	}
1663 	else
1664 	{
1665 		/* It is questionable: on PEEK we could:
1666 		   - do not return fds - good, but too simple 8)
1667 		   - return fds, and do not return them on read (old strategy,
1668 		     apparently wrong)
1669 		   - clone fds (I chose it for now, it is the most universal
1670 		     solution)
1671 
1672 		   POSIX 1003.1g does not actually define this clearly
1673 		   at all. POSIX 1003.1g doesn't define a lot of things
1674 		   clearly however!
1675 
1676 		*/
1677 		if (UNIXCB(skb).fp)
1678 			siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1679 	}
1680 	err = size;
1681 
1682 	scm_recv(sock, msg, siocb->scm, flags);
1683 
1684 out_free:
1685 	skb_free_datagram(sk,skb);
1686 out_unlock:
1687 	mutex_unlock(&u->readlock);
1688 out:
1689 	return err;
1690 }
1691 
1692 /*
1693  *	Sleep until data has arrive. But check for races..
1694  */
1695 
1696 static long unix_stream_data_wait(struct sock * sk, long timeo)
1697 {
1698 	DEFINE_WAIT(wait);
1699 
1700 	unix_state_lock(sk);
1701 
1702 	for (;;) {
1703 		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1704 
1705 		if (!skb_queue_empty(&sk->sk_receive_queue) ||
1706 		    sk->sk_err ||
1707 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1708 		    signal_pending(current) ||
1709 		    !timeo)
1710 			break;
1711 
1712 		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1713 		unix_state_unlock(sk);
1714 		timeo = schedule_timeout(timeo);
1715 		unix_state_lock(sk);
1716 		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1717 	}
1718 
1719 	finish_wait(sk->sk_sleep, &wait);
1720 	unix_state_unlock(sk);
1721 	return timeo;
1722 }
1723 
1724 
1725 
1726 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1727 			       struct msghdr *msg, size_t size,
1728 			       int flags)
1729 {
1730 	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1731 	struct scm_cookie tmp_scm;
1732 	struct sock *sk = sock->sk;
1733 	struct unix_sock *u = unix_sk(sk);
1734 	struct sockaddr_un *sunaddr=msg->msg_name;
1735 	int copied = 0;
1736 	int check_creds = 0;
1737 	int target;
1738 	int err = 0;
1739 	long timeo;
1740 
1741 	err = -EINVAL;
1742 	if (sk->sk_state != TCP_ESTABLISHED)
1743 		goto out;
1744 
1745 	err = -EOPNOTSUPP;
1746 	if (flags&MSG_OOB)
1747 		goto out;
1748 
1749 	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1750 	timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1751 
1752 	msg->msg_namelen = 0;
1753 
1754 	/* Lock the socket to prevent queue disordering
1755 	 * while sleeps in memcpy_tomsg
1756 	 */
1757 
1758 	if (!siocb->scm) {
1759 		siocb->scm = &tmp_scm;
1760 		memset(&tmp_scm, 0, sizeof(tmp_scm));
1761 	}
1762 
1763 	mutex_lock(&u->readlock);
1764 
1765 	do
1766 	{
1767 		int chunk;
1768 		struct sk_buff *skb;
1769 
1770 		unix_state_lock(sk);
1771 		skb = skb_dequeue(&sk->sk_receive_queue);
1772 		if (skb==NULL)
1773 		{
1774 			if (copied >= target)
1775 				goto unlock;
1776 
1777 			/*
1778 			 *	POSIX 1003.1g mandates this order.
1779 			 */
1780 
1781 			if ((err = sock_error(sk)) != 0)
1782 				goto unlock;
1783 			if (sk->sk_shutdown & RCV_SHUTDOWN)
1784 				goto unlock;
1785 
1786 			unix_state_unlock(sk);
1787 			err = -EAGAIN;
1788 			if (!timeo)
1789 				break;
1790 			mutex_unlock(&u->readlock);
1791 
1792 			timeo = unix_stream_data_wait(sk, timeo);
1793 
1794 			if (signal_pending(current)) {
1795 				err = sock_intr_errno(timeo);
1796 				goto out;
1797 			}
1798 			mutex_lock(&u->readlock);
1799 			continue;
1800  unlock:
1801 			unix_state_unlock(sk);
1802 			break;
1803 		}
1804 		unix_state_unlock(sk);
1805 
1806 		if (check_creds) {
1807 			/* Never glue messages from different writers */
1808 			if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, sizeof(siocb->scm->creds)) != 0) {
1809 				skb_queue_head(&sk->sk_receive_queue, skb);
1810 				break;
1811 			}
1812 		} else {
1813 			/* Copy credentials */
1814 			siocb->scm->creds = *UNIXCREDS(skb);
1815 			check_creds = 1;
1816 		}
1817 
1818 		/* Copy address just once */
1819 		if (sunaddr)
1820 		{
1821 			unix_copy_addr(msg, skb->sk);
1822 			sunaddr = NULL;
1823 		}
1824 
1825 		chunk = min_t(unsigned int, skb->len, size);
1826 		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1827 			skb_queue_head(&sk->sk_receive_queue, skb);
1828 			if (copied == 0)
1829 				copied = -EFAULT;
1830 			break;
1831 		}
1832 		copied += chunk;
1833 		size -= chunk;
1834 
1835 		/* Mark read part of skb as used */
1836 		if (!(flags & MSG_PEEK))
1837 		{
1838 			skb_pull(skb, chunk);
1839 
1840 			if (UNIXCB(skb).fp)
1841 				unix_detach_fds(siocb->scm, skb);
1842 
1843 			/* put the skb back if we didn't use it up.. */
1844 			if (skb->len)
1845 			{
1846 				skb_queue_head(&sk->sk_receive_queue, skb);
1847 				break;
1848 			}
1849 
1850 			kfree_skb(skb);
1851 
1852 			if (siocb->scm->fp)
1853 				break;
1854 		}
1855 		else
1856 		{
1857 			/* It is questionable, see note in unix_dgram_recvmsg.
1858 			 */
1859 			if (UNIXCB(skb).fp)
1860 				siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1861 
1862 			/* put message back and return */
1863 			skb_queue_head(&sk->sk_receive_queue, skb);
1864 			break;
1865 		}
1866 	} while (size);
1867 
1868 	mutex_unlock(&u->readlock);
1869 	scm_recv(sock, msg, siocb->scm, flags);
1870 out:
1871 	return copied ? : err;
1872 }
1873 
1874 static int unix_shutdown(struct socket *sock, int mode)
1875 {
1876 	struct sock *sk = sock->sk;
1877 	struct sock *other;
1878 
1879 	mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1880 
1881 	if (mode) {
1882 		unix_state_lock(sk);
1883 		sk->sk_shutdown |= mode;
1884 		other=unix_peer(sk);
1885 		if (other)
1886 			sock_hold(other);
1887 		unix_state_unlock(sk);
1888 		sk->sk_state_change(sk);
1889 
1890 		if (other &&
1891 			(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1892 
1893 			int peer_mode = 0;
1894 
1895 			if (mode&RCV_SHUTDOWN)
1896 				peer_mode |= SEND_SHUTDOWN;
1897 			if (mode&SEND_SHUTDOWN)
1898 				peer_mode |= RCV_SHUTDOWN;
1899 			unix_state_lock(other);
1900 			other->sk_shutdown |= peer_mode;
1901 			unix_state_unlock(other);
1902 			other->sk_state_change(other);
1903 			read_lock(&other->sk_callback_lock);
1904 			if (peer_mode == SHUTDOWN_MASK)
1905 				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
1906 			else if (peer_mode & RCV_SHUTDOWN)
1907 				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1908 			read_unlock(&other->sk_callback_lock);
1909 		}
1910 		if (other)
1911 			sock_put(other);
1912 	}
1913 	return 0;
1914 }
1915 
1916 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1917 {
1918 	struct sock *sk = sock->sk;
1919 	long amount=0;
1920 	int err;
1921 
1922 	switch(cmd)
1923 	{
1924 		case SIOCOUTQ:
1925 			amount = atomic_read(&sk->sk_wmem_alloc);
1926 			err = put_user(amount, (int __user *)arg);
1927 			break;
1928 		case SIOCINQ:
1929 		{
1930 			struct sk_buff *skb;
1931 
1932 			if (sk->sk_state == TCP_LISTEN) {
1933 				err = -EINVAL;
1934 				break;
1935 			}
1936 
1937 			spin_lock(&sk->sk_receive_queue.lock);
1938 			if (sk->sk_type == SOCK_STREAM ||
1939 			    sk->sk_type == SOCK_SEQPACKET) {
1940 				skb_queue_walk(&sk->sk_receive_queue, skb)
1941 					amount += skb->len;
1942 			} else {
1943 				skb = skb_peek(&sk->sk_receive_queue);
1944 				if (skb)
1945 					amount=skb->len;
1946 			}
1947 			spin_unlock(&sk->sk_receive_queue.lock);
1948 			err = put_user(amount, (int __user *)arg);
1949 			break;
1950 		}
1951 
1952 		default:
1953 			err = -ENOIOCTLCMD;
1954 			break;
1955 	}
1956 	return err;
1957 }
1958 
1959 static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait)
1960 {
1961 	struct sock *sk = sock->sk;
1962 	unsigned int mask;
1963 
1964 	poll_wait(file, sk->sk_sleep, wait);
1965 	mask = 0;
1966 
1967 	/* exceptional events? */
1968 	if (sk->sk_err)
1969 		mask |= POLLERR;
1970 	if (sk->sk_shutdown == SHUTDOWN_MASK)
1971 		mask |= POLLHUP;
1972 	if (sk->sk_shutdown & RCV_SHUTDOWN)
1973 		mask |= POLLRDHUP;
1974 
1975 	/* readable? */
1976 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
1977 	    (sk->sk_shutdown & RCV_SHUTDOWN))
1978 		mask |= POLLIN | POLLRDNORM;
1979 
1980 	/* Connection-based need to check for termination and startup */
1981 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE)
1982 		mask |= POLLHUP;
1983 
1984 	/*
1985 	 * we set writable also when the other side has shut down the
1986 	 * connection. This prevents stuck sockets.
1987 	 */
1988 	if (unix_writable(sk))
1989 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
1990 
1991 	return mask;
1992 }
1993 
1994 
1995 #ifdef CONFIG_PROC_FS
1996 static struct sock *first_unix_socket(int *i)
1997 {
1998 	for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
1999 		if (!hlist_empty(&unix_socket_table[*i]))
2000 			return __sk_head(&unix_socket_table[*i]);
2001 	}
2002 	return NULL;
2003 }
2004 
2005 static struct sock *next_unix_socket(int *i, struct sock *s)
2006 {
2007 	struct sock *next = sk_next(s);
2008 	/* More in this chain? */
2009 	if (next)
2010 		return next;
2011 	/* Look for next non-empty chain. */
2012 	for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2013 		if (!hlist_empty(&unix_socket_table[*i]))
2014 			return __sk_head(&unix_socket_table[*i]);
2015 	}
2016 	return NULL;
2017 }
2018 
2019 struct unix_iter_state {
2020 	struct seq_net_private p;
2021 	int i;
2022 };
2023 static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
2024 {
2025 	struct unix_iter_state *iter = seq->private;
2026 	loff_t off = 0;
2027 	struct sock *s;
2028 
2029 	for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2030 		if (sock_net(s) != seq_file_net(seq))
2031 			continue;
2032 		if (off == pos)
2033 			return s;
2034 		++off;
2035 	}
2036 	return NULL;
2037 }
2038 
2039 
2040 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2041 	__acquires(unix_table_lock)
2042 {
2043 	spin_lock(&unix_table_lock);
2044 	return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2045 }
2046 
2047 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2048 {
2049 	struct unix_iter_state *iter = seq->private;
2050 	struct sock *sk = v;
2051 	++*pos;
2052 
2053 	if (v == SEQ_START_TOKEN)
2054 		sk = first_unix_socket(&iter->i);
2055 	else
2056 		sk = next_unix_socket(&iter->i, sk);
2057 	while (sk && (sock_net(sk) != seq_file_net(seq)))
2058 		sk = next_unix_socket(&iter->i, sk);
2059 	return sk;
2060 }
2061 
2062 static void unix_seq_stop(struct seq_file *seq, void *v)
2063 	__releases(unix_table_lock)
2064 {
2065 	spin_unlock(&unix_table_lock);
2066 }
2067 
2068 static int unix_seq_show(struct seq_file *seq, void *v)
2069 {
2070 
2071 	if (v == SEQ_START_TOKEN)
2072 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2073 			 "Inode Path\n");
2074 	else {
2075 		struct sock *s = v;
2076 		struct unix_sock *u = unix_sk(s);
2077 		unix_state_lock(s);
2078 
2079 		seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
2080 			s,
2081 			atomic_read(&s->sk_refcnt),
2082 			0,
2083 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2084 			s->sk_type,
2085 			s->sk_socket ?
2086 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2087 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2088 			sock_i_ino(s));
2089 
2090 		if (u->addr) {
2091 			int i, len;
2092 			seq_putc(seq, ' ');
2093 
2094 			i = 0;
2095 			len = u->addr->len - sizeof(short);
2096 			if (!UNIX_ABSTRACT(s))
2097 				len--;
2098 			else {
2099 				seq_putc(seq, '@');
2100 				i++;
2101 			}
2102 			for ( ; i < len; i++)
2103 				seq_putc(seq, u->addr->name->sun_path[i]);
2104 		}
2105 		unix_state_unlock(s);
2106 		seq_putc(seq, '\n');
2107 	}
2108 
2109 	return 0;
2110 }
2111 
2112 static const struct seq_operations unix_seq_ops = {
2113 	.start  = unix_seq_start,
2114 	.next   = unix_seq_next,
2115 	.stop   = unix_seq_stop,
2116 	.show   = unix_seq_show,
2117 };
2118 
2119 
2120 static int unix_seq_open(struct inode *inode, struct file *file)
2121 {
2122 	return seq_open_net(inode, file, &unix_seq_ops,
2123 			    sizeof(struct unix_iter_state));
2124 }
2125 
2126 static const struct file_operations unix_seq_fops = {
2127 	.owner		= THIS_MODULE,
2128 	.open		= unix_seq_open,
2129 	.read		= seq_read,
2130 	.llseek		= seq_lseek,
2131 	.release	= seq_release_net,
2132 };
2133 
2134 #endif
2135 
2136 static struct net_proto_family unix_family_ops = {
2137 	.family = PF_UNIX,
2138 	.create = unix_create,
2139 	.owner	= THIS_MODULE,
2140 };
2141 
2142 
2143 static int unix_net_init(struct net *net)
2144 {
2145 	int error = -ENOMEM;
2146 
2147 	net->unx.sysctl_max_dgram_qlen = 10;
2148 	if (unix_sysctl_register(net))
2149 		goto out;
2150 
2151 #ifdef CONFIG_PROC_FS
2152 	if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2153 		unix_sysctl_unregister(net);
2154 		goto out;
2155 	}
2156 #endif
2157 	error = 0;
2158 out:
2159 	return 0;
2160 }
2161 
2162 static void unix_net_exit(struct net *net)
2163 {
2164 	unix_sysctl_unregister(net);
2165 	proc_net_remove(net, "unix");
2166 }
2167 
2168 static struct pernet_operations unix_net_ops = {
2169 	.init = unix_net_init,
2170 	.exit = unix_net_exit,
2171 };
2172 
2173 static int __init af_unix_init(void)
2174 {
2175 	int rc = -1;
2176 	struct sk_buff *dummy_skb;
2177 
2178 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2179 
2180 	rc = proto_register(&unix_proto, 1);
2181 	if (rc != 0) {
2182 		printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2183 		       __func__);
2184 		goto out;
2185 	}
2186 
2187 	sock_register(&unix_family_ops);
2188 	register_pernet_subsys(&unix_net_ops);
2189 out:
2190 	return rc;
2191 }
2192 
2193 static void __exit af_unix_exit(void)
2194 {
2195 	sock_unregister(PF_UNIX);
2196 	proto_unregister(&unix_proto);
2197 	unregister_pernet_subsys(&unix_net_ops);
2198 }
2199 
2200 /* Earlier than device_initcall() so that other drivers invoking
2201    request_module() don't end up in a loop when modprobe tries
2202    to use a UNIX socket. But later than subsys_initcall() because
2203    we depend on stuff initialised there */
2204 fs_initcall(af_unix_init);
2205 module_exit(af_unix_exit);
2206 
2207 MODULE_LICENSE("GPL");
2208 MODULE_ALIAS_NETPROTO(PF_UNIX);
2209