xref: /linux/net/unix/af_unix.c (revision b32d133aec5dc882cf783a293f393bfb3f4379e1)
1 /*
2  * NET4:	Implementation of BSD Unix domain sockets.
3  *
4  * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *
6  *		This program is free software; you can redistribute it and/or
7  *		modify it under the terms of the GNU General Public License
8  *		as published by the Free Software Foundation; either version
9  *		2 of the License, or (at your option) any later version.
10  *
11  * Fixes:
12  *		Linus Torvalds	:	Assorted bug cures.
13  *		Niibe Yutaka	:	async I/O support.
14  *		Carsten Paeth	:	PF_UNIX check, address fixes.
15  *		Alan Cox	:	Limit size of allocated blocks.
16  *		Alan Cox	:	Fixed the stupid socketpair bug.
17  *		Alan Cox	:	BSD compatibility fine tuning.
18  *		Alan Cox	:	Fixed a bug in connect when interrupted.
19  *		Alan Cox	:	Sorted out a proper draft version of
20  *					file descriptor passing hacked up from
21  *					Mike Shaver's work.
22  *		Marty Leisner	:	Fixes to fd passing
23  *		Nick Nevin	:	recvmsg bugfix.
24  *		Alan Cox	:	Started proper garbage collector
25  *		Heiko EiBfeldt	:	Missing verify_area check
26  *		Alan Cox	:	Started POSIXisms
27  *		Andreas Schwab	:	Replace inode by dentry for proper
28  *					reference counting
29  *		Kirk Petersen	:	Made this a module
30  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
31  *					Lots of bug fixes.
32  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
33  *					by above two patches.
34  *	     Andrea Arcangeli	:	If possible we block in connect(2)
35  *					if the max backlog of the listen socket
36  *					is been reached. This won't break
37  *					old apps and it will avoid huge amount
38  *					of socks hashed (this for unix_gc()
39  *					performances reasons).
40  *					Security fix that limits the max
41  *					number of socks to 2*max_files and
42  *					the number of skb queueable in the
43  *					dgram receiver.
44  *		Artur Skawina   :	Hash function optimizations
45  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
46  *	      Malcolm Beattie   :	Set peercred for socketpair
47  *	     Michal Ostrowski   :       Module initialization cleanup.
48  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
49  *	     				the core infrastructure is doing that
50  *	     				for all net proto families now (2.5.69+)
51  *
52  *
53  * Known differences from reference BSD that was tested:
54  *
55  *	[TO FIX]
56  *	ECONNREFUSED is not returned from one end of a connected() socket to the
57  *		other the moment one end closes.
58  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
59  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
60  *	[NOT TO FIX]
61  *	accept() returns a path name even if the connecting socket has closed
62  *		in the meantime (BSD loses the path and gives up).
63  *	accept() returns 0 length path for an unbound connector. BSD returns 16
64  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
66  *	BSD af_unix apparently has connect forgetting to block properly.
67  *		(need to check this with the POSIX spec in detail)
68  *
69  * Differences from 2.0.0-11-... (ANK)
70  *	Bug fixes and improvements.
71  *		- client shutdown killed server socket.
72  *		- removed all useless cli/sti pairs.
73  *
74  *	Semantic changes/extensions.
75  *		- generic control message passing.
76  *		- SCM_CREDENTIALS control message.
77  *		- "Abstract" (not FS based) socket bindings.
78  *		  Abstract names are sequences of bytes (not zero terminated)
79  *		  started by 0, so that this name space does not intersect
80  *		  with BSD names.
81  */
82 
83 #include <linux/module.h>
84 #include <linux/kernel.h>
85 #include <linux/signal.h>
86 #include <linux/sched.h>
87 #include <linux/errno.h>
88 #include <linux/string.h>
89 #include <linux/stat.h>
90 #include <linux/dcache.h>
91 #include <linux/namei.h>
92 #include <linux/socket.h>
93 #include <linux/un.h>
94 #include <linux/fcntl.h>
95 #include <linux/termios.h>
96 #include <linux/sockios.h>
97 #include <linux/net.h>
98 #include <linux/in.h>
99 #include <linux/fs.h>
100 #include <linux/slab.h>
101 #include <asm/uaccess.h>
102 #include <linux/skbuff.h>
103 #include <linux/netdevice.h>
104 #include <net/net_namespace.h>
105 #include <net/sock.h>
106 #include <net/tcp_states.h>
107 #include <net/af_unix.h>
108 #include <linux/proc_fs.h>
109 #include <linux/seq_file.h>
110 #include <net/scm.h>
111 #include <linux/init.h>
112 #include <linux/poll.h>
113 #include <linux/rtnetlink.h>
114 #include <linux/mount.h>
115 #include <net/checksum.h>
116 #include <linux/security.h>
117 
118 static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
119 static DEFINE_SPINLOCK(unix_table_lock);
120 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
121 
122 #define unix_sockets_unbound	(&unix_socket_table[UNIX_HASH_SIZE])
123 
124 #define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
125 
126 #ifdef CONFIG_SECURITY_NETWORK
127 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
128 {
129 	memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
130 }
131 
132 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
133 {
134 	scm->secid = *UNIXSID(skb);
135 }
136 #else
137 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
138 { }
139 
140 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
141 { }
142 #endif /* CONFIG_SECURITY_NETWORK */
143 
144 /*
145  *  SMP locking strategy:
146  *    hash table is protected with spinlock unix_table_lock
147  *    each socket state is protected by separate rwlock.
148  */
149 
150 static inline unsigned unix_hash_fold(__wsum n)
151 {
152 	unsigned hash = (__force unsigned)n;
153 	hash ^= hash>>16;
154 	hash ^= hash>>8;
155 	return hash&(UNIX_HASH_SIZE-1);
156 }
157 
158 #define unix_peer(sk) (unix_sk(sk)->peer)
159 
160 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
161 {
162 	return unix_peer(osk) == sk;
163 }
164 
165 static inline int unix_may_send(struct sock *sk, struct sock *osk)
166 {
167 	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
168 }
169 
170 static inline int unix_recvq_full(struct sock const *sk)
171 {
172 	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
173 }
174 
175 static struct sock *unix_peer_get(struct sock *s)
176 {
177 	struct sock *peer;
178 
179 	unix_state_lock(s);
180 	peer = unix_peer(s);
181 	if (peer)
182 		sock_hold(peer);
183 	unix_state_unlock(s);
184 	return peer;
185 }
186 
187 static inline void unix_release_addr(struct unix_address *addr)
188 {
189 	if (atomic_dec_and_test(&addr->refcnt))
190 		kfree(addr);
191 }
192 
193 /*
194  *	Check unix socket name:
195  *		- should be not zero length.
196  *	        - if started by not zero, should be NULL terminated (FS object)
197  *		- if started by zero, it is abstract name.
198  */
199 
200 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp)
201 {
202 	if (len <= sizeof(short) || len > sizeof(*sunaddr))
203 		return -EINVAL;
204 	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
205 		return -EINVAL;
206 	if (sunaddr->sun_path[0]) {
207 		/*
208 		 * This may look like an off by one error but it is a bit more
209 		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
210 		 * sun_path[108] doesnt as such exist.  However in kernel space
211 		 * we are guaranteed that it is a valid memory location in our
212 		 * kernel address buffer.
213 		 */
214 		((char *)sunaddr)[len] = 0;
215 		len = strlen(sunaddr->sun_path)+1+sizeof(short);
216 		return len;
217 	}
218 
219 	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
220 	return len;
221 }
222 
223 static void __unix_remove_socket(struct sock *sk)
224 {
225 	sk_del_node_init(sk);
226 }
227 
228 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
229 {
230 	WARN_ON(!sk_unhashed(sk));
231 	sk_add_node(sk, list);
232 }
233 
234 static inline void unix_remove_socket(struct sock *sk)
235 {
236 	spin_lock(&unix_table_lock);
237 	__unix_remove_socket(sk);
238 	spin_unlock(&unix_table_lock);
239 }
240 
241 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
242 {
243 	spin_lock(&unix_table_lock);
244 	__unix_insert_socket(list, sk);
245 	spin_unlock(&unix_table_lock);
246 }
247 
248 static struct sock *__unix_find_socket_byname(struct net *net,
249 					      struct sockaddr_un *sunname,
250 					      int len, int type, unsigned hash)
251 {
252 	struct sock *s;
253 	struct hlist_node *node;
254 
255 	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
256 		struct unix_sock *u = unix_sk(s);
257 
258 		if (!net_eq(sock_net(s), net))
259 			continue;
260 
261 		if (u->addr->len == len &&
262 		    !memcmp(u->addr->name, sunname, len))
263 			goto found;
264 	}
265 	s = NULL;
266 found:
267 	return s;
268 }
269 
270 static inline struct sock *unix_find_socket_byname(struct net *net,
271 						   struct sockaddr_un *sunname,
272 						   int len, int type,
273 						   unsigned hash)
274 {
275 	struct sock *s;
276 
277 	spin_lock(&unix_table_lock);
278 	s = __unix_find_socket_byname(net, sunname, len, type, hash);
279 	if (s)
280 		sock_hold(s);
281 	spin_unlock(&unix_table_lock);
282 	return s;
283 }
284 
285 static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
286 {
287 	struct sock *s;
288 	struct hlist_node *node;
289 
290 	spin_lock(&unix_table_lock);
291 	sk_for_each(s, node,
292 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
293 		struct dentry *dentry = unix_sk(s)->dentry;
294 
295 		if (!net_eq(sock_net(s), net))
296 			continue;
297 
298 		if (dentry && dentry->d_inode == i) {
299 			sock_hold(s);
300 			goto found;
301 		}
302 	}
303 	s = NULL;
304 found:
305 	spin_unlock(&unix_table_lock);
306 	return s;
307 }
308 
309 static inline int unix_writable(struct sock *sk)
310 {
311 	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
312 }
313 
314 static void unix_write_space(struct sock *sk)
315 {
316 	read_lock(&sk->sk_callback_lock);
317 	if (unix_writable(sk)) {
318 		if (sk_has_sleeper(sk))
319 			wake_up_interruptible_sync(sk->sk_sleep);
320 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
321 	}
322 	read_unlock(&sk->sk_callback_lock);
323 }
324 
325 /* When dgram socket disconnects (or changes its peer), we clear its receive
326  * queue of packets arrived from previous peer. First, it allows to do
327  * flow control based only on wmem_alloc; second, sk connected to peer
328  * may receive messages only from that peer. */
329 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
330 {
331 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
332 		skb_queue_purge(&sk->sk_receive_queue);
333 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
334 
335 		/* If one link of bidirectional dgram pipe is disconnected,
336 		 * we signal error. Messages are lost. Do not make this,
337 		 * when peer was not connected to us.
338 		 */
339 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
340 			other->sk_err = ECONNRESET;
341 			other->sk_error_report(other);
342 		}
343 	}
344 }
345 
346 static void unix_sock_destructor(struct sock *sk)
347 {
348 	struct unix_sock *u = unix_sk(sk);
349 
350 	skb_queue_purge(&sk->sk_receive_queue);
351 
352 	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
353 	WARN_ON(!sk_unhashed(sk));
354 	WARN_ON(sk->sk_socket);
355 	if (!sock_flag(sk, SOCK_DEAD)) {
356 		printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
357 		return;
358 	}
359 
360 	if (u->addr)
361 		unix_release_addr(u->addr);
362 
363 	atomic_dec(&unix_nr_socks);
364 	local_bh_disable();
365 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
366 	local_bh_enable();
367 #ifdef UNIX_REFCNT_DEBUG
368 	printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk,
369 		atomic_read(&unix_nr_socks));
370 #endif
371 }
372 
373 static int unix_release_sock(struct sock *sk, int embrion)
374 {
375 	struct unix_sock *u = unix_sk(sk);
376 	struct dentry *dentry;
377 	struct vfsmount *mnt;
378 	struct sock *skpair;
379 	struct sk_buff *skb;
380 	int state;
381 
382 	unix_remove_socket(sk);
383 
384 	/* Clear state */
385 	unix_state_lock(sk);
386 	sock_orphan(sk);
387 	sk->sk_shutdown = SHUTDOWN_MASK;
388 	dentry	     = u->dentry;
389 	u->dentry    = NULL;
390 	mnt	     = u->mnt;
391 	u->mnt	     = NULL;
392 	state = sk->sk_state;
393 	sk->sk_state = TCP_CLOSE;
394 	unix_state_unlock(sk);
395 
396 	wake_up_interruptible_all(&u->peer_wait);
397 
398 	skpair = unix_peer(sk);
399 
400 	if (skpair != NULL) {
401 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
402 			unix_state_lock(skpair);
403 			/* No more writes */
404 			skpair->sk_shutdown = SHUTDOWN_MASK;
405 			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
406 				skpair->sk_err = ECONNRESET;
407 			unix_state_unlock(skpair);
408 			skpair->sk_state_change(skpair);
409 			read_lock(&skpair->sk_callback_lock);
410 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
411 			read_unlock(&skpair->sk_callback_lock);
412 		}
413 		sock_put(skpair); /* It may now die */
414 		unix_peer(sk) = NULL;
415 	}
416 
417 	/* Try to flush out this socket. Throw out buffers at least */
418 
419 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
420 		if (state == TCP_LISTEN)
421 			unix_release_sock(skb->sk, 1);
422 		/* passed fds are erased in the kfree_skb hook	      */
423 		kfree_skb(skb);
424 	}
425 
426 	if (dentry) {
427 		dput(dentry);
428 		mntput(mnt);
429 	}
430 
431 	sock_put(sk);
432 
433 	/* ---- Socket is dead now and most probably destroyed ---- */
434 
435 	/*
436 	 * Fixme: BSD difference: In BSD all sockets connected to use get
437 	 *	  ECONNRESET and we die on the spot. In Linux we behave
438 	 *	  like files and pipes do and wait for the last
439 	 *	  dereference.
440 	 *
441 	 * Can't we simply set sock->err?
442 	 *
443 	 *	  What the above comment does talk about? --ANK(980817)
444 	 */
445 
446 	if (unix_tot_inflight)
447 		unix_gc();		/* Garbage collect fds */
448 
449 	return 0;
450 }
451 
452 static int unix_listen(struct socket *sock, int backlog)
453 {
454 	int err;
455 	struct sock *sk = sock->sk;
456 	struct unix_sock *u = unix_sk(sk);
457 
458 	err = -EOPNOTSUPP;
459 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
460 		goto out;	/* Only stream/seqpacket sockets accept */
461 	err = -EINVAL;
462 	if (!u->addr)
463 		goto out;	/* No listens on an unbound socket */
464 	unix_state_lock(sk);
465 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
466 		goto out_unlock;
467 	if (backlog > sk->sk_max_ack_backlog)
468 		wake_up_interruptible_all(&u->peer_wait);
469 	sk->sk_max_ack_backlog	= backlog;
470 	sk->sk_state		= TCP_LISTEN;
471 	/* set credentials so connect can copy them */
472 	sk->sk_peercred.pid	= task_tgid_vnr(current);
473 	current_euid_egid(&sk->sk_peercred.uid, &sk->sk_peercred.gid);
474 	err = 0;
475 
476 out_unlock:
477 	unix_state_unlock(sk);
478 out:
479 	return err;
480 }
481 
482 static int unix_release(struct socket *);
483 static int unix_bind(struct socket *, struct sockaddr *, int);
484 static int unix_stream_connect(struct socket *, struct sockaddr *,
485 			       int addr_len, int flags);
486 static int unix_socketpair(struct socket *, struct socket *);
487 static int unix_accept(struct socket *, struct socket *, int);
488 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
489 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
490 static unsigned int unix_dgram_poll(struct file *, struct socket *,
491 				    poll_table *);
492 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
493 static int unix_shutdown(struct socket *, int);
494 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
495 			       struct msghdr *, size_t);
496 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
497 			       struct msghdr *, size_t, int);
498 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
499 			      struct msghdr *, size_t);
500 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
501 			      struct msghdr *, size_t, int);
502 static int unix_dgram_connect(struct socket *, struct sockaddr *,
503 			      int, int);
504 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
505 				  struct msghdr *, size_t);
506 
507 static const struct proto_ops unix_stream_ops = {
508 	.family =	PF_UNIX,
509 	.owner =	THIS_MODULE,
510 	.release =	unix_release,
511 	.bind =		unix_bind,
512 	.connect =	unix_stream_connect,
513 	.socketpair =	unix_socketpair,
514 	.accept =	unix_accept,
515 	.getname =	unix_getname,
516 	.poll =		unix_poll,
517 	.ioctl =	unix_ioctl,
518 	.listen =	unix_listen,
519 	.shutdown =	unix_shutdown,
520 	.setsockopt =	sock_no_setsockopt,
521 	.getsockopt =	sock_no_getsockopt,
522 	.sendmsg =	unix_stream_sendmsg,
523 	.recvmsg =	unix_stream_recvmsg,
524 	.mmap =		sock_no_mmap,
525 	.sendpage =	sock_no_sendpage,
526 };
527 
528 static const struct proto_ops unix_dgram_ops = {
529 	.family =	PF_UNIX,
530 	.owner =	THIS_MODULE,
531 	.release =	unix_release,
532 	.bind =		unix_bind,
533 	.connect =	unix_dgram_connect,
534 	.socketpair =	unix_socketpair,
535 	.accept =	sock_no_accept,
536 	.getname =	unix_getname,
537 	.poll =		unix_dgram_poll,
538 	.ioctl =	unix_ioctl,
539 	.listen =	sock_no_listen,
540 	.shutdown =	unix_shutdown,
541 	.setsockopt =	sock_no_setsockopt,
542 	.getsockopt =	sock_no_getsockopt,
543 	.sendmsg =	unix_dgram_sendmsg,
544 	.recvmsg =	unix_dgram_recvmsg,
545 	.mmap =		sock_no_mmap,
546 	.sendpage =	sock_no_sendpage,
547 };
548 
549 static const struct proto_ops unix_seqpacket_ops = {
550 	.family =	PF_UNIX,
551 	.owner =	THIS_MODULE,
552 	.release =	unix_release,
553 	.bind =		unix_bind,
554 	.connect =	unix_stream_connect,
555 	.socketpair =	unix_socketpair,
556 	.accept =	unix_accept,
557 	.getname =	unix_getname,
558 	.poll =		unix_dgram_poll,
559 	.ioctl =	unix_ioctl,
560 	.listen =	unix_listen,
561 	.shutdown =	unix_shutdown,
562 	.setsockopt =	sock_no_setsockopt,
563 	.getsockopt =	sock_no_getsockopt,
564 	.sendmsg =	unix_seqpacket_sendmsg,
565 	.recvmsg =	unix_dgram_recvmsg,
566 	.mmap =		sock_no_mmap,
567 	.sendpage =	sock_no_sendpage,
568 };
569 
570 static struct proto unix_proto = {
571 	.name			= "UNIX",
572 	.owner			= THIS_MODULE,
573 	.obj_size		= sizeof(struct unix_sock),
574 };
575 
576 /*
577  * AF_UNIX sockets do not interact with hardware, hence they
578  * dont trigger interrupts - so it's safe for them to have
579  * bh-unsafe locking for their sk_receive_queue.lock. Split off
580  * this special lock-class by reinitializing the spinlock key:
581  */
582 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
583 
584 static struct sock *unix_create1(struct net *net, struct socket *sock)
585 {
586 	struct sock *sk = NULL;
587 	struct unix_sock *u;
588 
589 	atomic_inc(&unix_nr_socks);
590 	if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
591 		goto out;
592 
593 	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
594 	if (!sk)
595 		goto out;
596 
597 	sock_init_data(sock, sk);
598 	lockdep_set_class(&sk->sk_receive_queue.lock,
599 				&af_unix_sk_receive_queue_lock_key);
600 
601 	sk->sk_write_space	= unix_write_space;
602 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
603 	sk->sk_destruct		= unix_sock_destructor;
604 	u	  = unix_sk(sk);
605 	u->dentry = NULL;
606 	u->mnt	  = NULL;
607 	spin_lock_init(&u->lock);
608 	atomic_long_set(&u->inflight, 0);
609 	INIT_LIST_HEAD(&u->link);
610 	mutex_init(&u->readlock); /* single task reading lock */
611 	init_waitqueue_head(&u->peer_wait);
612 	unix_insert_socket(unix_sockets_unbound, sk);
613 out:
614 	if (sk == NULL)
615 		atomic_dec(&unix_nr_socks);
616 	else {
617 		local_bh_disable();
618 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
619 		local_bh_enable();
620 	}
621 	return sk;
622 }
623 
624 static int unix_create(struct net *net, struct socket *sock, int protocol)
625 {
626 	if (protocol && protocol != PF_UNIX)
627 		return -EPROTONOSUPPORT;
628 
629 	sock->state = SS_UNCONNECTED;
630 
631 	switch (sock->type) {
632 	case SOCK_STREAM:
633 		sock->ops = &unix_stream_ops;
634 		break;
635 		/*
636 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
637 		 *	nothing uses it.
638 		 */
639 	case SOCK_RAW:
640 		sock->type = SOCK_DGRAM;
641 	case SOCK_DGRAM:
642 		sock->ops = &unix_dgram_ops;
643 		break;
644 	case SOCK_SEQPACKET:
645 		sock->ops = &unix_seqpacket_ops;
646 		break;
647 	default:
648 		return -ESOCKTNOSUPPORT;
649 	}
650 
651 	return unix_create1(net, sock) ? 0 : -ENOMEM;
652 }
653 
654 static int unix_release(struct socket *sock)
655 {
656 	struct sock *sk = sock->sk;
657 
658 	if (!sk)
659 		return 0;
660 
661 	sock->sk = NULL;
662 
663 	return unix_release_sock(sk, 0);
664 }
665 
666 static int unix_autobind(struct socket *sock)
667 {
668 	struct sock *sk = sock->sk;
669 	struct net *net = sock_net(sk);
670 	struct unix_sock *u = unix_sk(sk);
671 	static u32 ordernum = 1;
672 	struct unix_address *addr;
673 	int err;
674 
675 	mutex_lock(&u->readlock);
676 
677 	err = 0;
678 	if (u->addr)
679 		goto out;
680 
681 	err = -ENOMEM;
682 	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
683 	if (!addr)
684 		goto out;
685 
686 	addr->name->sun_family = AF_UNIX;
687 	atomic_set(&addr->refcnt, 1);
688 
689 retry:
690 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
691 	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
692 
693 	spin_lock(&unix_table_lock);
694 	ordernum = (ordernum+1)&0xFFFFF;
695 
696 	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
697 				      addr->hash)) {
698 		spin_unlock(&unix_table_lock);
699 		/* Sanity yield. It is unusual case, but yet... */
700 		if (!(ordernum&0xFF))
701 			yield();
702 		goto retry;
703 	}
704 	addr->hash ^= sk->sk_type;
705 
706 	__unix_remove_socket(sk);
707 	u->addr = addr;
708 	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
709 	spin_unlock(&unix_table_lock);
710 	err = 0;
711 
712 out:	mutex_unlock(&u->readlock);
713 	return err;
714 }
715 
716 static struct sock *unix_find_other(struct net *net,
717 				    struct sockaddr_un *sunname, int len,
718 				    int type, unsigned hash, int *error)
719 {
720 	struct sock *u;
721 	struct path path;
722 	int err = 0;
723 
724 	if (sunname->sun_path[0]) {
725 		struct inode *inode;
726 		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
727 		if (err)
728 			goto fail;
729 		inode = path.dentry->d_inode;
730 		err = inode_permission(inode, MAY_WRITE);
731 		if (err)
732 			goto put_fail;
733 
734 		err = -ECONNREFUSED;
735 		if (!S_ISSOCK(inode->i_mode))
736 			goto put_fail;
737 		u = unix_find_socket_byinode(net, inode);
738 		if (!u)
739 			goto put_fail;
740 
741 		if (u->sk_type == type)
742 			touch_atime(path.mnt, path.dentry);
743 
744 		path_put(&path);
745 
746 		err = -EPROTOTYPE;
747 		if (u->sk_type != type) {
748 			sock_put(u);
749 			goto fail;
750 		}
751 	} else {
752 		err = -ECONNREFUSED;
753 		u = unix_find_socket_byname(net, sunname, len, type, hash);
754 		if (u) {
755 			struct dentry *dentry;
756 			dentry = unix_sk(u)->dentry;
757 			if (dentry)
758 				touch_atime(unix_sk(u)->mnt, dentry);
759 		} else
760 			goto fail;
761 	}
762 	return u;
763 
764 put_fail:
765 	path_put(&path);
766 fail:
767 	*error = err;
768 	return NULL;
769 }
770 
771 
772 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
773 {
774 	struct sock *sk = sock->sk;
775 	struct net *net = sock_net(sk);
776 	struct unix_sock *u = unix_sk(sk);
777 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
778 	struct dentry *dentry = NULL;
779 	struct nameidata nd;
780 	int err;
781 	unsigned hash;
782 	struct unix_address *addr;
783 	struct hlist_head *list;
784 
785 	err = -EINVAL;
786 	if (sunaddr->sun_family != AF_UNIX)
787 		goto out;
788 
789 	if (addr_len == sizeof(short)) {
790 		err = unix_autobind(sock);
791 		goto out;
792 	}
793 
794 	err = unix_mkname(sunaddr, addr_len, &hash);
795 	if (err < 0)
796 		goto out;
797 	addr_len = err;
798 
799 	mutex_lock(&u->readlock);
800 
801 	err = -EINVAL;
802 	if (u->addr)
803 		goto out_up;
804 
805 	err = -ENOMEM;
806 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
807 	if (!addr)
808 		goto out_up;
809 
810 	memcpy(addr->name, sunaddr, addr_len);
811 	addr->len = addr_len;
812 	addr->hash = hash ^ sk->sk_type;
813 	atomic_set(&addr->refcnt, 1);
814 
815 	if (sunaddr->sun_path[0]) {
816 		unsigned int mode;
817 		err = 0;
818 		/*
819 		 * Get the parent directory, calculate the hash for last
820 		 * component.
821 		 */
822 		err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
823 		if (err)
824 			goto out_mknod_parent;
825 
826 		dentry = lookup_create(&nd, 0);
827 		err = PTR_ERR(dentry);
828 		if (IS_ERR(dentry))
829 			goto out_mknod_unlock;
830 
831 		/*
832 		 * All right, let's create it.
833 		 */
834 		mode = S_IFSOCK |
835 		       (SOCK_INODE(sock)->i_mode & ~current_umask());
836 		err = mnt_want_write(nd.path.mnt);
837 		if (err)
838 			goto out_mknod_dput;
839 		err = security_path_mknod(&nd.path, dentry, mode, 0);
840 		if (err)
841 			goto out_mknod_drop_write;
842 		err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
843 out_mknod_drop_write:
844 		mnt_drop_write(nd.path.mnt);
845 		if (err)
846 			goto out_mknod_dput;
847 		mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
848 		dput(nd.path.dentry);
849 		nd.path.dentry = dentry;
850 
851 		addr->hash = UNIX_HASH_SIZE;
852 	}
853 
854 	spin_lock(&unix_table_lock);
855 
856 	if (!sunaddr->sun_path[0]) {
857 		err = -EADDRINUSE;
858 		if (__unix_find_socket_byname(net, sunaddr, addr_len,
859 					      sk->sk_type, hash)) {
860 			unix_release_addr(addr);
861 			goto out_unlock;
862 		}
863 
864 		list = &unix_socket_table[addr->hash];
865 	} else {
866 		list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
867 		u->dentry = nd.path.dentry;
868 		u->mnt    = nd.path.mnt;
869 	}
870 
871 	err = 0;
872 	__unix_remove_socket(sk);
873 	u->addr = addr;
874 	__unix_insert_socket(list, sk);
875 
876 out_unlock:
877 	spin_unlock(&unix_table_lock);
878 out_up:
879 	mutex_unlock(&u->readlock);
880 out:
881 	return err;
882 
883 out_mknod_dput:
884 	dput(dentry);
885 out_mknod_unlock:
886 	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
887 	path_put(&nd.path);
888 out_mknod_parent:
889 	if (err == -EEXIST)
890 		err = -EADDRINUSE;
891 	unix_release_addr(addr);
892 	goto out_up;
893 }
894 
895 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
896 {
897 	if (unlikely(sk1 == sk2) || !sk2) {
898 		unix_state_lock(sk1);
899 		return;
900 	}
901 	if (sk1 < sk2) {
902 		unix_state_lock(sk1);
903 		unix_state_lock_nested(sk2);
904 	} else {
905 		unix_state_lock(sk2);
906 		unix_state_lock_nested(sk1);
907 	}
908 }
909 
910 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
911 {
912 	if (unlikely(sk1 == sk2) || !sk2) {
913 		unix_state_unlock(sk1);
914 		return;
915 	}
916 	unix_state_unlock(sk1);
917 	unix_state_unlock(sk2);
918 }
919 
920 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
921 			      int alen, int flags)
922 {
923 	struct sock *sk = sock->sk;
924 	struct net *net = sock_net(sk);
925 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
926 	struct sock *other;
927 	unsigned hash;
928 	int err;
929 
930 	if (addr->sa_family != AF_UNSPEC) {
931 		err = unix_mkname(sunaddr, alen, &hash);
932 		if (err < 0)
933 			goto out;
934 		alen = err;
935 
936 		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
937 		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
938 			goto out;
939 
940 restart:
941 		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
942 		if (!other)
943 			goto out;
944 
945 		unix_state_double_lock(sk, other);
946 
947 		/* Apparently VFS overslept socket death. Retry. */
948 		if (sock_flag(other, SOCK_DEAD)) {
949 			unix_state_double_unlock(sk, other);
950 			sock_put(other);
951 			goto restart;
952 		}
953 
954 		err = -EPERM;
955 		if (!unix_may_send(sk, other))
956 			goto out_unlock;
957 
958 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
959 		if (err)
960 			goto out_unlock;
961 
962 	} else {
963 		/*
964 		 *	1003.1g breaking connected state with AF_UNSPEC
965 		 */
966 		other = NULL;
967 		unix_state_double_lock(sk, other);
968 	}
969 
970 	/*
971 	 * If it was connected, reconnect.
972 	 */
973 	if (unix_peer(sk)) {
974 		struct sock *old_peer = unix_peer(sk);
975 		unix_peer(sk) = other;
976 		unix_state_double_unlock(sk, other);
977 
978 		if (other != old_peer)
979 			unix_dgram_disconnected(sk, old_peer);
980 		sock_put(old_peer);
981 	} else {
982 		unix_peer(sk) = other;
983 		unix_state_double_unlock(sk, other);
984 	}
985 	return 0;
986 
987 out_unlock:
988 	unix_state_double_unlock(sk, other);
989 	sock_put(other);
990 out:
991 	return err;
992 }
993 
994 static long unix_wait_for_peer(struct sock *other, long timeo)
995 {
996 	struct unix_sock *u = unix_sk(other);
997 	int sched;
998 	DEFINE_WAIT(wait);
999 
1000 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1001 
1002 	sched = !sock_flag(other, SOCK_DEAD) &&
1003 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1004 		unix_recvq_full(other);
1005 
1006 	unix_state_unlock(other);
1007 
1008 	if (sched)
1009 		timeo = schedule_timeout(timeo);
1010 
1011 	finish_wait(&u->peer_wait, &wait);
1012 	return timeo;
1013 }
1014 
1015 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1016 			       int addr_len, int flags)
1017 {
1018 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1019 	struct sock *sk = sock->sk;
1020 	struct net *net = sock_net(sk);
1021 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1022 	struct sock *newsk = NULL;
1023 	struct sock *other = NULL;
1024 	struct sk_buff *skb = NULL;
1025 	unsigned hash;
1026 	int st;
1027 	int err;
1028 	long timeo;
1029 
1030 	err = unix_mkname(sunaddr, addr_len, &hash);
1031 	if (err < 0)
1032 		goto out;
1033 	addr_len = err;
1034 
1035 	if (test_bit(SOCK_PASSCRED, &sock->flags)
1036 		&& !u->addr && (err = unix_autobind(sock)) != 0)
1037 		goto out;
1038 
1039 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1040 
1041 	/* First of all allocate resources.
1042 	   If we will make it after state is locked,
1043 	   we will have to recheck all again in any case.
1044 	 */
1045 
1046 	err = -ENOMEM;
1047 
1048 	/* create new sock for complete connection */
1049 	newsk = unix_create1(sock_net(sk), NULL);
1050 	if (newsk == NULL)
1051 		goto out;
1052 
1053 	/* Allocate skb for sending to listening sock */
1054 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1055 	if (skb == NULL)
1056 		goto out;
1057 
1058 restart:
1059 	/*  Find listening sock. */
1060 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1061 	if (!other)
1062 		goto out;
1063 
1064 	/* Latch state of peer */
1065 	unix_state_lock(other);
1066 
1067 	/* Apparently VFS overslept socket death. Retry. */
1068 	if (sock_flag(other, SOCK_DEAD)) {
1069 		unix_state_unlock(other);
1070 		sock_put(other);
1071 		goto restart;
1072 	}
1073 
1074 	err = -ECONNREFUSED;
1075 	if (other->sk_state != TCP_LISTEN)
1076 		goto out_unlock;
1077 	if (other->sk_shutdown & RCV_SHUTDOWN)
1078 		goto out_unlock;
1079 
1080 	if (unix_recvq_full(other)) {
1081 		err = -EAGAIN;
1082 		if (!timeo)
1083 			goto out_unlock;
1084 
1085 		timeo = unix_wait_for_peer(other, timeo);
1086 
1087 		err = sock_intr_errno(timeo);
1088 		if (signal_pending(current))
1089 			goto out;
1090 		sock_put(other);
1091 		goto restart;
1092 	}
1093 
1094 	/* Latch our state.
1095 
1096 	   It is tricky place. We need to grab write lock and cannot
1097 	   drop lock on peer. It is dangerous because deadlock is
1098 	   possible. Connect to self case and simultaneous
1099 	   attempt to connect are eliminated by checking socket
1100 	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1101 	   check this before attempt to grab lock.
1102 
1103 	   Well, and we have to recheck the state after socket locked.
1104 	 */
1105 	st = sk->sk_state;
1106 
1107 	switch (st) {
1108 	case TCP_CLOSE:
1109 		/* This is ok... continue with connect */
1110 		break;
1111 	case TCP_ESTABLISHED:
1112 		/* Socket is already connected */
1113 		err = -EISCONN;
1114 		goto out_unlock;
1115 	default:
1116 		err = -EINVAL;
1117 		goto out_unlock;
1118 	}
1119 
1120 	unix_state_lock_nested(sk);
1121 
1122 	if (sk->sk_state != st) {
1123 		unix_state_unlock(sk);
1124 		unix_state_unlock(other);
1125 		sock_put(other);
1126 		goto restart;
1127 	}
1128 
1129 	err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1130 	if (err) {
1131 		unix_state_unlock(sk);
1132 		goto out_unlock;
1133 	}
1134 
1135 	/* The way is open! Fastly set all the necessary fields... */
1136 
1137 	sock_hold(sk);
1138 	unix_peer(newsk)	= sk;
1139 	newsk->sk_state		= TCP_ESTABLISHED;
1140 	newsk->sk_type		= sk->sk_type;
1141 	newsk->sk_peercred.pid	= task_tgid_vnr(current);
1142 	current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid);
1143 	newu = unix_sk(newsk);
1144 	newsk->sk_sleep		= &newu->peer_wait;
1145 	otheru = unix_sk(other);
1146 
1147 	/* copy address information from listening to new sock*/
1148 	if (otheru->addr) {
1149 		atomic_inc(&otheru->addr->refcnt);
1150 		newu->addr = otheru->addr;
1151 	}
1152 	if (otheru->dentry) {
1153 		newu->dentry	= dget(otheru->dentry);
1154 		newu->mnt	= mntget(otheru->mnt);
1155 	}
1156 
1157 	/* Set credentials */
1158 	sk->sk_peercred = other->sk_peercred;
1159 
1160 	sock->state	= SS_CONNECTED;
1161 	sk->sk_state	= TCP_ESTABLISHED;
1162 	sock_hold(newsk);
1163 
1164 	smp_mb__after_atomic_inc();	/* sock_hold() does an atomic_inc() */
1165 	unix_peer(sk)	= newsk;
1166 
1167 	unix_state_unlock(sk);
1168 
1169 	/* take ten and and send info to listening sock */
1170 	spin_lock(&other->sk_receive_queue.lock);
1171 	__skb_queue_tail(&other->sk_receive_queue, skb);
1172 	spin_unlock(&other->sk_receive_queue.lock);
1173 	unix_state_unlock(other);
1174 	other->sk_data_ready(other, 0);
1175 	sock_put(other);
1176 	return 0;
1177 
1178 out_unlock:
1179 	if (other)
1180 		unix_state_unlock(other);
1181 
1182 out:
1183 	kfree_skb(skb);
1184 	if (newsk)
1185 		unix_release_sock(newsk, 0);
1186 	if (other)
1187 		sock_put(other);
1188 	return err;
1189 }
1190 
1191 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1192 {
1193 	struct sock *ska = socka->sk, *skb = sockb->sk;
1194 
1195 	/* Join our sockets back to back */
1196 	sock_hold(ska);
1197 	sock_hold(skb);
1198 	unix_peer(ska) = skb;
1199 	unix_peer(skb) = ska;
1200 	ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
1201 	current_euid_egid(&skb->sk_peercred.uid, &skb->sk_peercred.gid);
1202 	ska->sk_peercred.uid = skb->sk_peercred.uid;
1203 	ska->sk_peercred.gid = skb->sk_peercred.gid;
1204 
1205 	if (ska->sk_type != SOCK_DGRAM) {
1206 		ska->sk_state = TCP_ESTABLISHED;
1207 		skb->sk_state = TCP_ESTABLISHED;
1208 		socka->state  = SS_CONNECTED;
1209 		sockb->state  = SS_CONNECTED;
1210 	}
1211 	return 0;
1212 }
1213 
1214 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1215 {
1216 	struct sock *sk = sock->sk;
1217 	struct sock *tsk;
1218 	struct sk_buff *skb;
1219 	int err;
1220 
1221 	err = -EOPNOTSUPP;
1222 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1223 		goto out;
1224 
1225 	err = -EINVAL;
1226 	if (sk->sk_state != TCP_LISTEN)
1227 		goto out;
1228 
1229 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1230 	 * so that no locks are necessary.
1231 	 */
1232 
1233 	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1234 	if (!skb) {
1235 		/* This means receive shutdown. */
1236 		if (err == 0)
1237 			err = -EINVAL;
1238 		goto out;
1239 	}
1240 
1241 	tsk = skb->sk;
1242 	skb_free_datagram(sk, skb);
1243 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1244 
1245 	/* attach accepted sock to socket */
1246 	unix_state_lock(tsk);
1247 	newsock->state = SS_CONNECTED;
1248 	sock_graft(tsk, newsock);
1249 	unix_state_unlock(tsk);
1250 	return 0;
1251 
1252 out:
1253 	return err;
1254 }
1255 
1256 
1257 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1258 {
1259 	struct sock *sk = sock->sk;
1260 	struct unix_sock *u;
1261 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1262 	int err = 0;
1263 
1264 	if (peer) {
1265 		sk = unix_peer_get(sk);
1266 
1267 		err = -ENOTCONN;
1268 		if (!sk)
1269 			goto out;
1270 		err = 0;
1271 	} else {
1272 		sock_hold(sk);
1273 	}
1274 
1275 	u = unix_sk(sk);
1276 	unix_state_lock(sk);
1277 	if (!u->addr) {
1278 		sunaddr->sun_family = AF_UNIX;
1279 		sunaddr->sun_path[0] = 0;
1280 		*uaddr_len = sizeof(short);
1281 	} else {
1282 		struct unix_address *addr = u->addr;
1283 
1284 		*uaddr_len = addr->len;
1285 		memcpy(sunaddr, addr->name, *uaddr_len);
1286 	}
1287 	unix_state_unlock(sk);
1288 	sock_put(sk);
1289 out:
1290 	return err;
1291 }
1292 
1293 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1294 {
1295 	int i;
1296 
1297 	scm->fp = UNIXCB(skb).fp;
1298 	skb->destructor = sock_wfree;
1299 	UNIXCB(skb).fp = NULL;
1300 
1301 	for (i = scm->fp->count-1; i >= 0; i--)
1302 		unix_notinflight(scm->fp->fp[i]);
1303 }
1304 
1305 static void unix_destruct_fds(struct sk_buff *skb)
1306 {
1307 	struct scm_cookie scm;
1308 	memset(&scm, 0, sizeof(scm));
1309 	unix_detach_fds(&scm, skb);
1310 
1311 	/* Alas, it calls VFS */
1312 	/* So fscking what? fput() had been SMP-safe since the last Summer */
1313 	scm_destroy(&scm);
1314 	sock_wfree(skb);
1315 }
1316 
1317 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1318 {
1319 	int i;
1320 
1321 	/*
1322 	 * Need to duplicate file references for the sake of garbage
1323 	 * collection.  Otherwise a socket in the fps might become a
1324 	 * candidate for GC while the skb is not yet queued.
1325 	 */
1326 	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1327 	if (!UNIXCB(skb).fp)
1328 		return -ENOMEM;
1329 
1330 	for (i = scm->fp->count-1; i >= 0; i--)
1331 		unix_inflight(scm->fp->fp[i]);
1332 	skb->destructor = unix_destruct_fds;
1333 	return 0;
1334 }
1335 
1336 /*
1337  *	Send AF_UNIX data.
1338  */
1339 
1340 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1341 			      struct msghdr *msg, size_t len)
1342 {
1343 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1344 	struct sock *sk = sock->sk;
1345 	struct net *net = sock_net(sk);
1346 	struct unix_sock *u = unix_sk(sk);
1347 	struct sockaddr_un *sunaddr = msg->msg_name;
1348 	struct sock *other = NULL;
1349 	int namelen = 0; /* fake GCC */
1350 	int err;
1351 	unsigned hash;
1352 	struct sk_buff *skb;
1353 	long timeo;
1354 	struct scm_cookie tmp_scm;
1355 
1356 	if (NULL == siocb->scm)
1357 		siocb->scm = &tmp_scm;
1358 	wait_for_unix_gc();
1359 	err = scm_send(sock, msg, siocb->scm);
1360 	if (err < 0)
1361 		return err;
1362 
1363 	err = -EOPNOTSUPP;
1364 	if (msg->msg_flags&MSG_OOB)
1365 		goto out;
1366 
1367 	if (msg->msg_namelen) {
1368 		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1369 		if (err < 0)
1370 			goto out;
1371 		namelen = err;
1372 	} else {
1373 		sunaddr = NULL;
1374 		err = -ENOTCONN;
1375 		other = unix_peer_get(sk);
1376 		if (!other)
1377 			goto out;
1378 	}
1379 
1380 	if (test_bit(SOCK_PASSCRED, &sock->flags)
1381 		&& !u->addr && (err = unix_autobind(sock)) != 0)
1382 		goto out;
1383 
1384 	err = -EMSGSIZE;
1385 	if (len > sk->sk_sndbuf - 32)
1386 		goto out;
1387 
1388 	skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1389 	if (skb == NULL)
1390 		goto out;
1391 
1392 	memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1393 	if (siocb->scm->fp) {
1394 		err = unix_attach_fds(siocb->scm, skb);
1395 		if (err)
1396 			goto out_free;
1397 	}
1398 	unix_get_secdata(siocb->scm, skb);
1399 
1400 	skb_reset_transport_header(skb);
1401 	err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1402 	if (err)
1403 		goto out_free;
1404 
1405 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1406 
1407 restart:
1408 	if (!other) {
1409 		err = -ECONNRESET;
1410 		if (sunaddr == NULL)
1411 			goto out_free;
1412 
1413 		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1414 					hash, &err);
1415 		if (other == NULL)
1416 			goto out_free;
1417 	}
1418 
1419 	unix_state_lock(other);
1420 	err = -EPERM;
1421 	if (!unix_may_send(sk, other))
1422 		goto out_unlock;
1423 
1424 	if (sock_flag(other, SOCK_DEAD)) {
1425 		/*
1426 		 *	Check with 1003.1g - what should
1427 		 *	datagram error
1428 		 */
1429 		unix_state_unlock(other);
1430 		sock_put(other);
1431 
1432 		err = 0;
1433 		unix_state_lock(sk);
1434 		if (unix_peer(sk) == other) {
1435 			unix_peer(sk) = NULL;
1436 			unix_state_unlock(sk);
1437 
1438 			unix_dgram_disconnected(sk, other);
1439 			sock_put(other);
1440 			err = -ECONNREFUSED;
1441 		} else {
1442 			unix_state_unlock(sk);
1443 		}
1444 
1445 		other = NULL;
1446 		if (err)
1447 			goto out_free;
1448 		goto restart;
1449 	}
1450 
1451 	err = -EPIPE;
1452 	if (other->sk_shutdown & RCV_SHUTDOWN)
1453 		goto out_unlock;
1454 
1455 	if (sk->sk_type != SOCK_SEQPACKET) {
1456 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1457 		if (err)
1458 			goto out_unlock;
1459 	}
1460 
1461 	if (unix_peer(other) != sk && unix_recvq_full(other)) {
1462 		if (!timeo) {
1463 			err = -EAGAIN;
1464 			goto out_unlock;
1465 		}
1466 
1467 		timeo = unix_wait_for_peer(other, timeo);
1468 
1469 		err = sock_intr_errno(timeo);
1470 		if (signal_pending(current))
1471 			goto out_free;
1472 
1473 		goto restart;
1474 	}
1475 
1476 	skb_queue_tail(&other->sk_receive_queue, skb);
1477 	unix_state_unlock(other);
1478 	other->sk_data_ready(other, len);
1479 	sock_put(other);
1480 	scm_destroy(siocb->scm);
1481 	return len;
1482 
1483 out_unlock:
1484 	unix_state_unlock(other);
1485 out_free:
1486 	kfree_skb(skb);
1487 out:
1488 	if (other)
1489 		sock_put(other);
1490 	scm_destroy(siocb->scm);
1491 	return err;
1492 }
1493 
1494 
1495 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1496 			       struct msghdr *msg, size_t len)
1497 {
1498 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1499 	struct sock *sk = sock->sk;
1500 	struct sock *other = NULL;
1501 	struct sockaddr_un *sunaddr = msg->msg_name;
1502 	int err, size;
1503 	struct sk_buff *skb;
1504 	int sent = 0;
1505 	struct scm_cookie tmp_scm;
1506 	bool fds_sent = false;
1507 
1508 	if (NULL == siocb->scm)
1509 		siocb->scm = &tmp_scm;
1510 	wait_for_unix_gc();
1511 	err = scm_send(sock, msg, siocb->scm);
1512 	if (err < 0)
1513 		return err;
1514 
1515 	err = -EOPNOTSUPP;
1516 	if (msg->msg_flags&MSG_OOB)
1517 		goto out_err;
1518 
1519 	if (msg->msg_namelen) {
1520 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1521 		goto out_err;
1522 	} else {
1523 		sunaddr = NULL;
1524 		err = -ENOTCONN;
1525 		other = unix_peer(sk);
1526 		if (!other)
1527 			goto out_err;
1528 	}
1529 
1530 	if (sk->sk_shutdown & SEND_SHUTDOWN)
1531 		goto pipe_err;
1532 
1533 	while (sent < len) {
1534 		/*
1535 		 *	Optimisation for the fact that under 0.01% of X
1536 		 *	messages typically need breaking up.
1537 		 */
1538 
1539 		size = len-sent;
1540 
1541 		/* Keep two messages in the pipe so it schedules better */
1542 		if (size > ((sk->sk_sndbuf >> 1) - 64))
1543 			size = (sk->sk_sndbuf >> 1) - 64;
1544 
1545 		if (size > SKB_MAX_ALLOC)
1546 			size = SKB_MAX_ALLOC;
1547 
1548 		/*
1549 		 *	Grab a buffer
1550 		 */
1551 
1552 		skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1553 					  &err);
1554 
1555 		if (skb == NULL)
1556 			goto out_err;
1557 
1558 		/*
1559 		 *	If you pass two values to the sock_alloc_send_skb
1560 		 *	it tries to grab the large buffer with GFP_NOFS
1561 		 *	(which can fail easily), and if it fails grab the
1562 		 *	fallback size buffer which is under a page and will
1563 		 *	succeed. [Alan]
1564 		 */
1565 		size = min_t(int, size, skb_tailroom(skb));
1566 
1567 		memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1568 		/* Only send the fds in the first buffer */
1569 		if (siocb->scm->fp && !fds_sent) {
1570 			err = unix_attach_fds(siocb->scm, skb);
1571 			if (err) {
1572 				kfree_skb(skb);
1573 				goto out_err;
1574 			}
1575 			fds_sent = true;
1576 		}
1577 
1578 		err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1579 		if (err) {
1580 			kfree_skb(skb);
1581 			goto out_err;
1582 		}
1583 
1584 		unix_state_lock(other);
1585 
1586 		if (sock_flag(other, SOCK_DEAD) ||
1587 		    (other->sk_shutdown & RCV_SHUTDOWN))
1588 			goto pipe_err_free;
1589 
1590 		skb_queue_tail(&other->sk_receive_queue, skb);
1591 		unix_state_unlock(other);
1592 		other->sk_data_ready(other, size);
1593 		sent += size;
1594 	}
1595 
1596 	scm_destroy(siocb->scm);
1597 	siocb->scm = NULL;
1598 
1599 	return sent;
1600 
1601 pipe_err_free:
1602 	unix_state_unlock(other);
1603 	kfree_skb(skb);
1604 pipe_err:
1605 	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1606 		send_sig(SIGPIPE, current, 0);
1607 	err = -EPIPE;
1608 out_err:
1609 	scm_destroy(siocb->scm);
1610 	siocb->scm = NULL;
1611 	return sent ? : err;
1612 }
1613 
1614 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1615 				  struct msghdr *msg, size_t len)
1616 {
1617 	int err;
1618 	struct sock *sk = sock->sk;
1619 
1620 	err = sock_error(sk);
1621 	if (err)
1622 		return err;
1623 
1624 	if (sk->sk_state != TCP_ESTABLISHED)
1625 		return -ENOTCONN;
1626 
1627 	if (msg->msg_namelen)
1628 		msg->msg_namelen = 0;
1629 
1630 	return unix_dgram_sendmsg(kiocb, sock, msg, len);
1631 }
1632 
1633 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1634 {
1635 	struct unix_sock *u = unix_sk(sk);
1636 
1637 	msg->msg_namelen = 0;
1638 	if (u->addr) {
1639 		msg->msg_namelen = u->addr->len;
1640 		memcpy(msg->msg_name, u->addr->name, u->addr->len);
1641 	}
1642 }
1643 
1644 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1645 			      struct msghdr *msg, size_t size,
1646 			      int flags)
1647 {
1648 	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1649 	struct scm_cookie tmp_scm;
1650 	struct sock *sk = sock->sk;
1651 	struct unix_sock *u = unix_sk(sk);
1652 	int noblock = flags & MSG_DONTWAIT;
1653 	struct sk_buff *skb;
1654 	int err;
1655 
1656 	err = -EOPNOTSUPP;
1657 	if (flags&MSG_OOB)
1658 		goto out;
1659 
1660 	msg->msg_namelen = 0;
1661 
1662 	mutex_lock(&u->readlock);
1663 
1664 	skb = skb_recv_datagram(sk, flags, noblock, &err);
1665 	if (!skb) {
1666 		unix_state_lock(sk);
1667 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1668 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1669 		    (sk->sk_shutdown & RCV_SHUTDOWN))
1670 			err = 0;
1671 		unix_state_unlock(sk);
1672 		goto out_unlock;
1673 	}
1674 
1675 	wake_up_interruptible_sync(&u->peer_wait);
1676 
1677 	if (msg->msg_name)
1678 		unix_copy_addr(msg, skb->sk);
1679 
1680 	if (size > skb->len)
1681 		size = skb->len;
1682 	else if (size < skb->len)
1683 		msg->msg_flags |= MSG_TRUNC;
1684 
1685 	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1686 	if (err)
1687 		goto out_free;
1688 
1689 	if (!siocb->scm) {
1690 		siocb->scm = &tmp_scm;
1691 		memset(&tmp_scm, 0, sizeof(tmp_scm));
1692 	}
1693 	siocb->scm->creds = *UNIXCREDS(skb);
1694 	unix_set_secdata(siocb->scm, skb);
1695 
1696 	if (!(flags & MSG_PEEK)) {
1697 		if (UNIXCB(skb).fp)
1698 			unix_detach_fds(siocb->scm, skb);
1699 	} else {
1700 		/* It is questionable: on PEEK we could:
1701 		   - do not return fds - good, but too simple 8)
1702 		   - return fds, and do not return them on read (old strategy,
1703 		     apparently wrong)
1704 		   - clone fds (I chose it for now, it is the most universal
1705 		     solution)
1706 
1707 		   POSIX 1003.1g does not actually define this clearly
1708 		   at all. POSIX 1003.1g doesn't define a lot of things
1709 		   clearly however!
1710 
1711 		*/
1712 		if (UNIXCB(skb).fp)
1713 			siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1714 	}
1715 	err = size;
1716 
1717 	scm_recv(sock, msg, siocb->scm, flags);
1718 
1719 out_free:
1720 	skb_free_datagram(sk, skb);
1721 out_unlock:
1722 	mutex_unlock(&u->readlock);
1723 out:
1724 	return err;
1725 }
1726 
1727 /*
1728  *	Sleep until data has arrive. But check for races..
1729  */
1730 
1731 static long unix_stream_data_wait(struct sock *sk, long timeo)
1732 {
1733 	DEFINE_WAIT(wait);
1734 
1735 	unix_state_lock(sk);
1736 
1737 	for (;;) {
1738 		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1739 
1740 		if (!skb_queue_empty(&sk->sk_receive_queue) ||
1741 		    sk->sk_err ||
1742 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1743 		    signal_pending(current) ||
1744 		    !timeo)
1745 			break;
1746 
1747 		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1748 		unix_state_unlock(sk);
1749 		timeo = schedule_timeout(timeo);
1750 		unix_state_lock(sk);
1751 		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1752 	}
1753 
1754 	finish_wait(sk->sk_sleep, &wait);
1755 	unix_state_unlock(sk);
1756 	return timeo;
1757 }
1758 
1759 
1760 
1761 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1762 			       struct msghdr *msg, size_t size,
1763 			       int flags)
1764 {
1765 	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1766 	struct scm_cookie tmp_scm;
1767 	struct sock *sk = sock->sk;
1768 	struct unix_sock *u = unix_sk(sk);
1769 	struct sockaddr_un *sunaddr = msg->msg_name;
1770 	int copied = 0;
1771 	int check_creds = 0;
1772 	int target;
1773 	int err = 0;
1774 	long timeo;
1775 
1776 	err = -EINVAL;
1777 	if (sk->sk_state != TCP_ESTABLISHED)
1778 		goto out;
1779 
1780 	err = -EOPNOTSUPP;
1781 	if (flags&MSG_OOB)
1782 		goto out;
1783 
1784 	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1785 	timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1786 
1787 	msg->msg_namelen = 0;
1788 
1789 	/* Lock the socket to prevent queue disordering
1790 	 * while sleeps in memcpy_tomsg
1791 	 */
1792 
1793 	if (!siocb->scm) {
1794 		siocb->scm = &tmp_scm;
1795 		memset(&tmp_scm, 0, sizeof(tmp_scm));
1796 	}
1797 
1798 	mutex_lock(&u->readlock);
1799 
1800 	do {
1801 		int chunk;
1802 		struct sk_buff *skb;
1803 
1804 		unix_state_lock(sk);
1805 		skb = skb_dequeue(&sk->sk_receive_queue);
1806 		if (skb == NULL) {
1807 			if (copied >= target)
1808 				goto unlock;
1809 
1810 			/*
1811 			 *	POSIX 1003.1g mandates this order.
1812 			 */
1813 
1814 			err = sock_error(sk);
1815 			if (err)
1816 				goto unlock;
1817 			if (sk->sk_shutdown & RCV_SHUTDOWN)
1818 				goto unlock;
1819 
1820 			unix_state_unlock(sk);
1821 			err = -EAGAIN;
1822 			if (!timeo)
1823 				break;
1824 			mutex_unlock(&u->readlock);
1825 
1826 			timeo = unix_stream_data_wait(sk, timeo);
1827 
1828 			if (signal_pending(current)) {
1829 				err = sock_intr_errno(timeo);
1830 				goto out;
1831 			}
1832 			mutex_lock(&u->readlock);
1833 			continue;
1834  unlock:
1835 			unix_state_unlock(sk);
1836 			break;
1837 		}
1838 		unix_state_unlock(sk);
1839 
1840 		if (check_creds) {
1841 			/* Never glue messages from different writers */
1842 			if (memcmp(UNIXCREDS(skb), &siocb->scm->creds,
1843 				   sizeof(siocb->scm->creds)) != 0) {
1844 				skb_queue_head(&sk->sk_receive_queue, skb);
1845 				break;
1846 			}
1847 		} else {
1848 			/* Copy credentials */
1849 			siocb->scm->creds = *UNIXCREDS(skb);
1850 			check_creds = 1;
1851 		}
1852 
1853 		/* Copy address just once */
1854 		if (sunaddr) {
1855 			unix_copy_addr(msg, skb->sk);
1856 			sunaddr = NULL;
1857 		}
1858 
1859 		chunk = min_t(unsigned int, skb->len, size);
1860 		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1861 			skb_queue_head(&sk->sk_receive_queue, skb);
1862 			if (copied == 0)
1863 				copied = -EFAULT;
1864 			break;
1865 		}
1866 		copied += chunk;
1867 		size -= chunk;
1868 
1869 		/* Mark read part of skb as used */
1870 		if (!(flags & MSG_PEEK)) {
1871 			skb_pull(skb, chunk);
1872 
1873 			if (UNIXCB(skb).fp)
1874 				unix_detach_fds(siocb->scm, skb);
1875 
1876 			/* put the skb back if we didn't use it up.. */
1877 			if (skb->len) {
1878 				skb_queue_head(&sk->sk_receive_queue, skb);
1879 				break;
1880 			}
1881 
1882 			kfree_skb(skb);
1883 
1884 			if (siocb->scm->fp)
1885 				break;
1886 		} else {
1887 			/* It is questionable, see note in unix_dgram_recvmsg.
1888 			 */
1889 			if (UNIXCB(skb).fp)
1890 				siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1891 
1892 			/* put message back and return */
1893 			skb_queue_head(&sk->sk_receive_queue, skb);
1894 			break;
1895 		}
1896 	} while (size);
1897 
1898 	mutex_unlock(&u->readlock);
1899 	scm_recv(sock, msg, siocb->scm, flags);
1900 out:
1901 	return copied ? : err;
1902 }
1903 
1904 static int unix_shutdown(struct socket *sock, int mode)
1905 {
1906 	struct sock *sk = sock->sk;
1907 	struct sock *other;
1908 
1909 	mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1910 
1911 	if (mode) {
1912 		unix_state_lock(sk);
1913 		sk->sk_shutdown |= mode;
1914 		other = unix_peer(sk);
1915 		if (other)
1916 			sock_hold(other);
1917 		unix_state_unlock(sk);
1918 		sk->sk_state_change(sk);
1919 
1920 		if (other &&
1921 			(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1922 
1923 			int peer_mode = 0;
1924 
1925 			if (mode&RCV_SHUTDOWN)
1926 				peer_mode |= SEND_SHUTDOWN;
1927 			if (mode&SEND_SHUTDOWN)
1928 				peer_mode |= RCV_SHUTDOWN;
1929 			unix_state_lock(other);
1930 			other->sk_shutdown |= peer_mode;
1931 			unix_state_unlock(other);
1932 			other->sk_state_change(other);
1933 			read_lock(&other->sk_callback_lock);
1934 			if (peer_mode == SHUTDOWN_MASK)
1935 				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
1936 			else if (peer_mode & RCV_SHUTDOWN)
1937 				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1938 			read_unlock(&other->sk_callback_lock);
1939 		}
1940 		if (other)
1941 			sock_put(other);
1942 	}
1943 	return 0;
1944 }
1945 
1946 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1947 {
1948 	struct sock *sk = sock->sk;
1949 	long amount = 0;
1950 	int err;
1951 
1952 	switch (cmd) {
1953 	case SIOCOUTQ:
1954 		amount = sk_wmem_alloc_get(sk);
1955 		err = put_user(amount, (int __user *)arg);
1956 		break;
1957 	case SIOCINQ:
1958 		{
1959 			struct sk_buff *skb;
1960 
1961 			if (sk->sk_state == TCP_LISTEN) {
1962 				err = -EINVAL;
1963 				break;
1964 			}
1965 
1966 			spin_lock(&sk->sk_receive_queue.lock);
1967 			if (sk->sk_type == SOCK_STREAM ||
1968 			    sk->sk_type == SOCK_SEQPACKET) {
1969 				skb_queue_walk(&sk->sk_receive_queue, skb)
1970 					amount += skb->len;
1971 			} else {
1972 				skb = skb_peek(&sk->sk_receive_queue);
1973 				if (skb)
1974 					amount = skb->len;
1975 			}
1976 			spin_unlock(&sk->sk_receive_queue.lock);
1977 			err = put_user(amount, (int __user *)arg);
1978 			break;
1979 		}
1980 
1981 	default:
1982 		err = -ENOIOCTLCMD;
1983 		break;
1984 	}
1985 	return err;
1986 }
1987 
1988 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
1989 {
1990 	struct sock *sk = sock->sk;
1991 	unsigned int mask;
1992 
1993 	sock_poll_wait(file, sk->sk_sleep, wait);
1994 	mask = 0;
1995 
1996 	/* exceptional events? */
1997 	if (sk->sk_err)
1998 		mask |= POLLERR;
1999 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2000 		mask |= POLLHUP;
2001 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2002 		mask |= POLLRDHUP;
2003 
2004 	/* readable? */
2005 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
2006 	    (sk->sk_shutdown & RCV_SHUTDOWN))
2007 		mask |= POLLIN | POLLRDNORM;
2008 
2009 	/* Connection-based need to check for termination and startup */
2010 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2011 	    sk->sk_state == TCP_CLOSE)
2012 		mask |= POLLHUP;
2013 
2014 	/*
2015 	 * we set writable also when the other side has shut down the
2016 	 * connection. This prevents stuck sockets.
2017 	 */
2018 	if (unix_writable(sk))
2019 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2020 
2021 	return mask;
2022 }
2023 
2024 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2025 				    poll_table *wait)
2026 {
2027 	struct sock *sk = sock->sk, *other;
2028 	unsigned int mask, writable;
2029 
2030 	sock_poll_wait(file, sk->sk_sleep, wait);
2031 	mask = 0;
2032 
2033 	/* exceptional events? */
2034 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2035 		mask |= POLLERR;
2036 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2037 		mask |= POLLRDHUP;
2038 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2039 		mask |= POLLHUP;
2040 
2041 	/* readable? */
2042 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
2043 	    (sk->sk_shutdown & RCV_SHUTDOWN))
2044 		mask |= POLLIN | POLLRDNORM;
2045 
2046 	/* Connection-based need to check for termination and startup */
2047 	if (sk->sk_type == SOCK_SEQPACKET) {
2048 		if (sk->sk_state == TCP_CLOSE)
2049 			mask |= POLLHUP;
2050 		/* connection hasn't started yet? */
2051 		if (sk->sk_state == TCP_SYN_SENT)
2052 			return mask;
2053 	}
2054 
2055 	/* writable? */
2056 	writable = unix_writable(sk);
2057 	if (writable) {
2058 		other = unix_peer_get(sk);
2059 		if (other) {
2060 			if (unix_peer(other) != sk) {
2061 				sock_poll_wait(file, &unix_sk(other)->peer_wait,
2062 					  wait);
2063 				if (unix_recvq_full(other))
2064 					writable = 0;
2065 			}
2066 
2067 			sock_put(other);
2068 		}
2069 	}
2070 
2071 	if (writable)
2072 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2073 	else
2074 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2075 
2076 	return mask;
2077 }
2078 
2079 #ifdef CONFIG_PROC_FS
2080 static struct sock *first_unix_socket(int *i)
2081 {
2082 	for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
2083 		if (!hlist_empty(&unix_socket_table[*i]))
2084 			return __sk_head(&unix_socket_table[*i]);
2085 	}
2086 	return NULL;
2087 }
2088 
2089 static struct sock *next_unix_socket(int *i, struct sock *s)
2090 {
2091 	struct sock *next = sk_next(s);
2092 	/* More in this chain? */
2093 	if (next)
2094 		return next;
2095 	/* Look for next non-empty chain. */
2096 	for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2097 		if (!hlist_empty(&unix_socket_table[*i]))
2098 			return __sk_head(&unix_socket_table[*i]);
2099 	}
2100 	return NULL;
2101 }
2102 
2103 struct unix_iter_state {
2104 	struct seq_net_private p;
2105 	int i;
2106 };
2107 
2108 static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
2109 {
2110 	struct unix_iter_state *iter = seq->private;
2111 	loff_t off = 0;
2112 	struct sock *s;
2113 
2114 	for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2115 		if (sock_net(s) != seq_file_net(seq))
2116 			continue;
2117 		if (off == pos)
2118 			return s;
2119 		++off;
2120 	}
2121 	return NULL;
2122 }
2123 
2124 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2125 	__acquires(unix_table_lock)
2126 {
2127 	spin_lock(&unix_table_lock);
2128 	return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2129 }
2130 
2131 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2132 {
2133 	struct unix_iter_state *iter = seq->private;
2134 	struct sock *sk = v;
2135 	++*pos;
2136 
2137 	if (v == SEQ_START_TOKEN)
2138 		sk = first_unix_socket(&iter->i);
2139 	else
2140 		sk = next_unix_socket(&iter->i, sk);
2141 	while (sk && (sock_net(sk) != seq_file_net(seq)))
2142 		sk = next_unix_socket(&iter->i, sk);
2143 	return sk;
2144 }
2145 
2146 static void unix_seq_stop(struct seq_file *seq, void *v)
2147 	__releases(unix_table_lock)
2148 {
2149 	spin_unlock(&unix_table_lock);
2150 }
2151 
2152 static int unix_seq_show(struct seq_file *seq, void *v)
2153 {
2154 
2155 	if (v == SEQ_START_TOKEN)
2156 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2157 			 "Inode Path\n");
2158 	else {
2159 		struct sock *s = v;
2160 		struct unix_sock *u = unix_sk(s);
2161 		unix_state_lock(s);
2162 
2163 		seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
2164 			s,
2165 			atomic_read(&s->sk_refcnt),
2166 			0,
2167 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2168 			s->sk_type,
2169 			s->sk_socket ?
2170 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2171 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2172 			sock_i_ino(s));
2173 
2174 		if (u->addr) {
2175 			int i, len;
2176 			seq_putc(seq, ' ');
2177 
2178 			i = 0;
2179 			len = u->addr->len - sizeof(short);
2180 			if (!UNIX_ABSTRACT(s))
2181 				len--;
2182 			else {
2183 				seq_putc(seq, '@');
2184 				i++;
2185 			}
2186 			for ( ; i < len; i++)
2187 				seq_putc(seq, u->addr->name->sun_path[i]);
2188 		}
2189 		unix_state_unlock(s);
2190 		seq_putc(seq, '\n');
2191 	}
2192 
2193 	return 0;
2194 }
2195 
2196 static const struct seq_operations unix_seq_ops = {
2197 	.start  = unix_seq_start,
2198 	.next   = unix_seq_next,
2199 	.stop   = unix_seq_stop,
2200 	.show   = unix_seq_show,
2201 };
2202 
2203 static int unix_seq_open(struct inode *inode, struct file *file)
2204 {
2205 	return seq_open_net(inode, file, &unix_seq_ops,
2206 			    sizeof(struct unix_iter_state));
2207 }
2208 
2209 static const struct file_operations unix_seq_fops = {
2210 	.owner		= THIS_MODULE,
2211 	.open		= unix_seq_open,
2212 	.read		= seq_read,
2213 	.llseek		= seq_lseek,
2214 	.release	= seq_release_net,
2215 };
2216 
2217 #endif
2218 
2219 static struct net_proto_family unix_family_ops = {
2220 	.family = PF_UNIX,
2221 	.create = unix_create,
2222 	.owner	= THIS_MODULE,
2223 };
2224 
2225 
2226 static int unix_net_init(struct net *net)
2227 {
2228 	int error = -ENOMEM;
2229 
2230 	net->unx.sysctl_max_dgram_qlen = 10;
2231 	if (unix_sysctl_register(net))
2232 		goto out;
2233 
2234 #ifdef CONFIG_PROC_FS
2235 	if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2236 		unix_sysctl_unregister(net);
2237 		goto out;
2238 	}
2239 #endif
2240 	error = 0;
2241 out:
2242 	return error;
2243 }
2244 
2245 static void unix_net_exit(struct net *net)
2246 {
2247 	unix_sysctl_unregister(net);
2248 	proc_net_remove(net, "unix");
2249 }
2250 
2251 static struct pernet_operations unix_net_ops = {
2252 	.init = unix_net_init,
2253 	.exit = unix_net_exit,
2254 };
2255 
2256 static int __init af_unix_init(void)
2257 {
2258 	int rc = -1;
2259 	struct sk_buff *dummy_skb;
2260 
2261 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2262 
2263 	rc = proto_register(&unix_proto, 1);
2264 	if (rc != 0) {
2265 		printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2266 		       __func__);
2267 		goto out;
2268 	}
2269 
2270 	sock_register(&unix_family_ops);
2271 	register_pernet_subsys(&unix_net_ops);
2272 out:
2273 	return rc;
2274 }
2275 
2276 static void __exit af_unix_exit(void)
2277 {
2278 	sock_unregister(PF_UNIX);
2279 	proto_unregister(&unix_proto);
2280 	unregister_pernet_subsys(&unix_net_ops);
2281 }
2282 
2283 /* Earlier than device_initcall() so that other drivers invoking
2284    request_module() don't end up in a loop when modprobe tries
2285    to use a UNIX socket. But later than subsys_initcall() because
2286    we depend on stuff initialised there */
2287 fs_initcall(af_unix_init);
2288 module_exit(af_unix_exit);
2289 
2290 MODULE_LICENSE("GPL");
2291 MODULE_ALIAS_NETPROTO(PF_UNIX);
2292