xref: /linux/net/unix/af_unix.c (revision 0bf2461fdd9008290cf429e50e4f362dafab4249)
1 /*
2  * NET4:	Implementation of BSD Unix domain sockets.
3  *
4  * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *
6  *		This program is free software; you can redistribute it and/or
7  *		modify it under the terms of the GNU General Public License
8  *		as published by the Free Software Foundation; either version
9  *		2 of the License, or (at your option) any later version.
10  *
11  * Fixes:
12  *		Linus Torvalds	:	Assorted bug cures.
13  *		Niibe Yutaka	:	async I/O support.
14  *		Carsten Paeth	:	PF_UNIX check, address fixes.
15  *		Alan Cox	:	Limit size of allocated blocks.
16  *		Alan Cox	:	Fixed the stupid socketpair bug.
17  *		Alan Cox	:	BSD compatibility fine tuning.
18  *		Alan Cox	:	Fixed a bug in connect when interrupted.
19  *		Alan Cox	:	Sorted out a proper draft version of
20  *					file descriptor passing hacked up from
21  *					Mike Shaver's work.
22  *		Marty Leisner	:	Fixes to fd passing
23  *		Nick Nevin	:	recvmsg bugfix.
24  *		Alan Cox	:	Started proper garbage collector
25  *		Heiko EiBfeldt	:	Missing verify_area check
26  *		Alan Cox	:	Started POSIXisms
27  *		Andreas Schwab	:	Replace inode by dentry for proper
28  *					reference counting
29  *		Kirk Petersen	:	Made this a module
30  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
31  *					Lots of bug fixes.
32  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
33  *					by above two patches.
34  *	     Andrea Arcangeli	:	If possible we block in connect(2)
35  *					if the max backlog of the listen socket
36  *					is been reached. This won't break
37  *					old apps and it will avoid huge amount
38  *					of socks hashed (this for unix_gc()
39  *					performances reasons).
40  *					Security fix that limits the max
41  *					number of socks to 2*max_files and
42  *					the number of skb queueable in the
43  *					dgram receiver.
44  *		Artur Skawina   :	Hash function optimizations
45  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
46  *	      Malcolm Beattie   :	Set peercred for socketpair
47  *	     Michal Ostrowski   :       Module initialization cleanup.
48  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
49  *	     				the core infrastructure is doing that
50  *	     				for all net proto families now (2.5.69+)
51  *
52  *
53  * Known differences from reference BSD that was tested:
54  *
55  *	[TO FIX]
56  *	ECONNREFUSED is not returned from one end of a connected() socket to the
57  *		other the moment one end closes.
58  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
59  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
60  *	[NOT TO FIX]
61  *	accept() returns a path name even if the connecting socket has closed
62  *		in the meantime (BSD loses the path and gives up).
63  *	accept() returns 0 length path for an unbound connector. BSD returns 16
64  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
66  *	BSD af_unix apparently has connect forgetting to block properly.
67  *		(need to check this with the POSIX spec in detail)
68  *
69  * Differences from 2.0.0-11-... (ANK)
70  *	Bug fixes and improvements.
71  *		- client shutdown killed server socket.
72  *		- removed all useless cli/sti pairs.
73  *
74  *	Semantic changes/extensions.
75  *		- generic control message passing.
76  *		- SCM_CREDENTIALS control message.
77  *		- "Abstract" (not FS based) socket bindings.
78  *		  Abstract names are sequences of bytes (not zero terminated)
79  *		  started by 0, so that this name space does not intersect
80  *		  with BSD names.
81  */
82 
83 #include <linux/module.h>
84 #include <linux/kernel.h>
85 #include <linux/signal.h>
86 #include <linux/sched.h>
87 #include <linux/errno.h>
88 #include <linux/string.h>
89 #include <linux/stat.h>
90 #include <linux/dcache.h>
91 #include <linux/namei.h>
92 #include <linux/socket.h>
93 #include <linux/un.h>
94 #include <linux/fcntl.h>
95 #include <linux/termios.h>
96 #include <linux/sockios.h>
97 #include <linux/net.h>
98 #include <linux/in.h>
99 #include <linux/fs.h>
100 #include <linux/slab.h>
101 #include <asm/uaccess.h>
102 #include <linux/skbuff.h>
103 #include <linux/netdevice.h>
104 #include <net/net_namespace.h>
105 #include <net/sock.h>
106 #include <net/tcp_states.h>
107 #include <net/af_unix.h>
108 #include <linux/proc_fs.h>
109 #include <linux/seq_file.h>
110 #include <net/scm.h>
111 #include <linux/init.h>
112 #include <linux/poll.h>
113 #include <linux/rtnetlink.h>
114 #include <linux/mount.h>
115 #include <net/checksum.h>
116 #include <linux/security.h>
117 
118 static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
119 static DEFINE_SPINLOCK(unix_table_lock);
120 static atomic_long_t unix_nr_socks;
121 
122 #define unix_sockets_unbound	(&unix_socket_table[UNIX_HASH_SIZE])
123 
124 #define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
125 
126 #ifdef CONFIG_SECURITY_NETWORK
127 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
128 {
129 	memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
130 }
131 
132 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
133 {
134 	scm->secid = *UNIXSID(skb);
135 }
136 #else
137 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
138 { }
139 
140 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
141 { }
142 #endif /* CONFIG_SECURITY_NETWORK */
143 
144 /*
145  *  SMP locking strategy:
146  *    hash table is protected with spinlock unix_table_lock
147  *    each socket state is protected by separate spin lock.
148  */
149 
150 static inline unsigned unix_hash_fold(__wsum n)
151 {
152 	unsigned hash = (__force unsigned)n;
153 	hash ^= hash>>16;
154 	hash ^= hash>>8;
155 	return hash&(UNIX_HASH_SIZE-1);
156 }
157 
158 #define unix_peer(sk) (unix_sk(sk)->peer)
159 
160 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
161 {
162 	return unix_peer(osk) == sk;
163 }
164 
165 static inline int unix_may_send(struct sock *sk, struct sock *osk)
166 {
167 	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
168 }
169 
170 static inline int unix_recvq_full(struct sock const *sk)
171 {
172 	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
173 }
174 
175 static struct sock *unix_peer_get(struct sock *s)
176 {
177 	struct sock *peer;
178 
179 	unix_state_lock(s);
180 	peer = unix_peer(s);
181 	if (peer)
182 		sock_hold(peer);
183 	unix_state_unlock(s);
184 	return peer;
185 }
186 
187 static inline void unix_release_addr(struct unix_address *addr)
188 {
189 	if (atomic_dec_and_test(&addr->refcnt))
190 		kfree(addr);
191 }
192 
193 /*
194  *	Check unix socket name:
195  *		- should be not zero length.
196  *	        - if started by not zero, should be NULL terminated (FS object)
197  *		- if started by zero, it is abstract name.
198  */
199 
200 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp)
201 {
202 	if (len <= sizeof(short) || len > sizeof(*sunaddr))
203 		return -EINVAL;
204 	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
205 		return -EINVAL;
206 	if (sunaddr->sun_path[0]) {
207 		/*
208 		 * This may look like an off by one error but it is a bit more
209 		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
210 		 * sun_path[108] doesn't as such exist.  However in kernel space
211 		 * we are guaranteed that it is a valid memory location in our
212 		 * kernel address buffer.
213 		 */
214 		((char *)sunaddr)[len] = 0;
215 		len = strlen(sunaddr->sun_path)+1+sizeof(short);
216 		return len;
217 	}
218 
219 	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
220 	return len;
221 }
222 
223 static void __unix_remove_socket(struct sock *sk)
224 {
225 	sk_del_node_init(sk);
226 }
227 
228 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
229 {
230 	WARN_ON(!sk_unhashed(sk));
231 	sk_add_node(sk, list);
232 }
233 
234 static inline void unix_remove_socket(struct sock *sk)
235 {
236 	spin_lock(&unix_table_lock);
237 	__unix_remove_socket(sk);
238 	spin_unlock(&unix_table_lock);
239 }
240 
241 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
242 {
243 	spin_lock(&unix_table_lock);
244 	__unix_insert_socket(list, sk);
245 	spin_unlock(&unix_table_lock);
246 }
247 
248 static struct sock *__unix_find_socket_byname(struct net *net,
249 					      struct sockaddr_un *sunname,
250 					      int len, int type, unsigned hash)
251 {
252 	struct sock *s;
253 	struct hlist_node *node;
254 
255 	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
256 		struct unix_sock *u = unix_sk(s);
257 
258 		if (!net_eq(sock_net(s), net))
259 			continue;
260 
261 		if (u->addr->len == len &&
262 		    !memcmp(u->addr->name, sunname, len))
263 			goto found;
264 	}
265 	s = NULL;
266 found:
267 	return s;
268 }
269 
270 static inline struct sock *unix_find_socket_byname(struct net *net,
271 						   struct sockaddr_un *sunname,
272 						   int len, int type,
273 						   unsigned hash)
274 {
275 	struct sock *s;
276 
277 	spin_lock(&unix_table_lock);
278 	s = __unix_find_socket_byname(net, sunname, len, type, hash);
279 	if (s)
280 		sock_hold(s);
281 	spin_unlock(&unix_table_lock);
282 	return s;
283 }
284 
285 static struct sock *unix_find_socket_byinode(struct inode *i)
286 {
287 	struct sock *s;
288 	struct hlist_node *node;
289 
290 	spin_lock(&unix_table_lock);
291 	sk_for_each(s, node,
292 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
293 		struct dentry *dentry = unix_sk(s)->dentry;
294 
295 		if (dentry && dentry->d_inode == i) {
296 			sock_hold(s);
297 			goto found;
298 		}
299 	}
300 	s = NULL;
301 found:
302 	spin_unlock(&unix_table_lock);
303 	return s;
304 }
305 
306 static inline int unix_writable(struct sock *sk)
307 {
308 	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
309 }
310 
311 static void unix_write_space(struct sock *sk)
312 {
313 	struct socket_wq *wq;
314 
315 	rcu_read_lock();
316 	if (unix_writable(sk)) {
317 		wq = rcu_dereference(sk->sk_wq);
318 		if (wq_has_sleeper(wq))
319 			wake_up_interruptible_sync_poll(&wq->wait,
320 				POLLOUT | POLLWRNORM | POLLWRBAND);
321 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
322 	}
323 	rcu_read_unlock();
324 }
325 
326 /* When dgram socket disconnects (or changes its peer), we clear its receive
327  * queue of packets arrived from previous peer. First, it allows to do
328  * flow control based only on wmem_alloc; second, sk connected to peer
329  * may receive messages only from that peer. */
330 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
331 {
332 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
333 		skb_queue_purge(&sk->sk_receive_queue);
334 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
335 
336 		/* If one link of bidirectional dgram pipe is disconnected,
337 		 * we signal error. Messages are lost. Do not make this,
338 		 * when peer was not connected to us.
339 		 */
340 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
341 			other->sk_err = ECONNRESET;
342 			other->sk_error_report(other);
343 		}
344 	}
345 }
346 
347 static void unix_sock_destructor(struct sock *sk)
348 {
349 	struct unix_sock *u = unix_sk(sk);
350 
351 	skb_queue_purge(&sk->sk_receive_queue);
352 
353 	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
354 	WARN_ON(!sk_unhashed(sk));
355 	WARN_ON(sk->sk_socket);
356 	if (!sock_flag(sk, SOCK_DEAD)) {
357 		printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
358 		return;
359 	}
360 
361 	if (u->addr)
362 		unix_release_addr(u->addr);
363 
364 	atomic_long_dec(&unix_nr_socks);
365 	local_bh_disable();
366 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
367 	local_bh_enable();
368 #ifdef UNIX_REFCNT_DEBUG
369 	printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk,
370 		atomic_long_read(&unix_nr_socks));
371 #endif
372 }
373 
374 static int unix_release_sock(struct sock *sk, int embrion)
375 {
376 	struct unix_sock *u = unix_sk(sk);
377 	struct dentry *dentry;
378 	struct vfsmount *mnt;
379 	struct sock *skpair;
380 	struct sk_buff *skb;
381 	int state;
382 
383 	unix_remove_socket(sk);
384 
385 	/* Clear state */
386 	unix_state_lock(sk);
387 	sock_orphan(sk);
388 	sk->sk_shutdown = SHUTDOWN_MASK;
389 	dentry	     = u->dentry;
390 	u->dentry    = NULL;
391 	mnt	     = u->mnt;
392 	u->mnt	     = NULL;
393 	state = sk->sk_state;
394 	sk->sk_state = TCP_CLOSE;
395 	unix_state_unlock(sk);
396 
397 	wake_up_interruptible_all(&u->peer_wait);
398 
399 	skpair = unix_peer(sk);
400 
401 	if (skpair != NULL) {
402 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
403 			unix_state_lock(skpair);
404 			/* No more writes */
405 			skpair->sk_shutdown = SHUTDOWN_MASK;
406 			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
407 				skpair->sk_err = ECONNRESET;
408 			unix_state_unlock(skpair);
409 			skpair->sk_state_change(skpair);
410 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
411 		}
412 		sock_put(skpair); /* It may now die */
413 		unix_peer(sk) = NULL;
414 	}
415 
416 	/* Try to flush out this socket. Throw out buffers at least */
417 
418 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
419 		if (state == TCP_LISTEN)
420 			unix_release_sock(skb->sk, 1);
421 		/* passed fds are erased in the kfree_skb hook	      */
422 		kfree_skb(skb);
423 	}
424 
425 	if (dentry) {
426 		dput(dentry);
427 		mntput(mnt);
428 	}
429 
430 	sock_put(sk);
431 
432 	/* ---- Socket is dead now and most probably destroyed ---- */
433 
434 	/*
435 	 * Fixme: BSD difference: In BSD all sockets connected to use get
436 	 *	  ECONNRESET and we die on the spot. In Linux we behave
437 	 *	  like files and pipes do and wait for the last
438 	 *	  dereference.
439 	 *
440 	 * Can't we simply set sock->err?
441 	 *
442 	 *	  What the above comment does talk about? --ANK(980817)
443 	 */
444 
445 	if (unix_tot_inflight)
446 		unix_gc();		/* Garbage collect fds */
447 
448 	return 0;
449 }
450 
451 static void init_peercred(struct sock *sk)
452 {
453 	put_pid(sk->sk_peer_pid);
454 	if (sk->sk_peer_cred)
455 		put_cred(sk->sk_peer_cred);
456 	sk->sk_peer_pid  = get_pid(task_tgid(current));
457 	sk->sk_peer_cred = get_current_cred();
458 }
459 
460 static void copy_peercred(struct sock *sk, struct sock *peersk)
461 {
462 	put_pid(sk->sk_peer_pid);
463 	if (sk->sk_peer_cred)
464 		put_cred(sk->sk_peer_cred);
465 	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
466 	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
467 }
468 
469 static int unix_listen(struct socket *sock, int backlog)
470 {
471 	int err;
472 	struct sock *sk = sock->sk;
473 	struct unix_sock *u = unix_sk(sk);
474 	struct pid *old_pid = NULL;
475 	const struct cred *old_cred = NULL;
476 
477 	err = -EOPNOTSUPP;
478 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
479 		goto out;	/* Only stream/seqpacket sockets accept */
480 	err = -EINVAL;
481 	if (!u->addr)
482 		goto out;	/* No listens on an unbound socket */
483 	unix_state_lock(sk);
484 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
485 		goto out_unlock;
486 	if (backlog > sk->sk_max_ack_backlog)
487 		wake_up_interruptible_all(&u->peer_wait);
488 	sk->sk_max_ack_backlog	= backlog;
489 	sk->sk_state		= TCP_LISTEN;
490 	/* set credentials so connect can copy them */
491 	init_peercred(sk);
492 	err = 0;
493 
494 out_unlock:
495 	unix_state_unlock(sk);
496 	put_pid(old_pid);
497 	if (old_cred)
498 		put_cred(old_cred);
499 out:
500 	return err;
501 }
502 
503 static int unix_release(struct socket *);
504 static int unix_bind(struct socket *, struct sockaddr *, int);
505 static int unix_stream_connect(struct socket *, struct sockaddr *,
506 			       int addr_len, int flags);
507 static int unix_socketpair(struct socket *, struct socket *);
508 static int unix_accept(struct socket *, struct socket *, int);
509 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
510 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
511 static unsigned int unix_dgram_poll(struct file *, struct socket *,
512 				    poll_table *);
513 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
514 static int unix_shutdown(struct socket *, int);
515 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
516 			       struct msghdr *, size_t);
517 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
518 			       struct msghdr *, size_t, int);
519 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
520 			      struct msghdr *, size_t);
521 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
522 			      struct msghdr *, size_t, int);
523 static int unix_dgram_connect(struct socket *, struct sockaddr *,
524 			      int, int);
525 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
526 				  struct msghdr *, size_t);
527 static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *,
528 				  struct msghdr *, size_t, int);
529 
530 static const struct proto_ops unix_stream_ops = {
531 	.family =	PF_UNIX,
532 	.owner =	THIS_MODULE,
533 	.release =	unix_release,
534 	.bind =		unix_bind,
535 	.connect =	unix_stream_connect,
536 	.socketpair =	unix_socketpair,
537 	.accept =	unix_accept,
538 	.getname =	unix_getname,
539 	.poll =		unix_poll,
540 	.ioctl =	unix_ioctl,
541 	.listen =	unix_listen,
542 	.shutdown =	unix_shutdown,
543 	.setsockopt =	sock_no_setsockopt,
544 	.getsockopt =	sock_no_getsockopt,
545 	.sendmsg =	unix_stream_sendmsg,
546 	.recvmsg =	unix_stream_recvmsg,
547 	.mmap =		sock_no_mmap,
548 	.sendpage =	sock_no_sendpage,
549 };
550 
551 static const struct proto_ops unix_dgram_ops = {
552 	.family =	PF_UNIX,
553 	.owner =	THIS_MODULE,
554 	.release =	unix_release,
555 	.bind =		unix_bind,
556 	.connect =	unix_dgram_connect,
557 	.socketpair =	unix_socketpair,
558 	.accept =	sock_no_accept,
559 	.getname =	unix_getname,
560 	.poll =		unix_dgram_poll,
561 	.ioctl =	unix_ioctl,
562 	.listen =	sock_no_listen,
563 	.shutdown =	unix_shutdown,
564 	.setsockopt =	sock_no_setsockopt,
565 	.getsockopt =	sock_no_getsockopt,
566 	.sendmsg =	unix_dgram_sendmsg,
567 	.recvmsg =	unix_dgram_recvmsg,
568 	.mmap =		sock_no_mmap,
569 	.sendpage =	sock_no_sendpage,
570 };
571 
572 static const struct proto_ops unix_seqpacket_ops = {
573 	.family =	PF_UNIX,
574 	.owner =	THIS_MODULE,
575 	.release =	unix_release,
576 	.bind =		unix_bind,
577 	.connect =	unix_stream_connect,
578 	.socketpair =	unix_socketpair,
579 	.accept =	unix_accept,
580 	.getname =	unix_getname,
581 	.poll =		unix_dgram_poll,
582 	.ioctl =	unix_ioctl,
583 	.listen =	unix_listen,
584 	.shutdown =	unix_shutdown,
585 	.setsockopt =	sock_no_setsockopt,
586 	.getsockopt =	sock_no_getsockopt,
587 	.sendmsg =	unix_seqpacket_sendmsg,
588 	.recvmsg =	unix_seqpacket_recvmsg,
589 	.mmap =		sock_no_mmap,
590 	.sendpage =	sock_no_sendpage,
591 };
592 
593 static struct proto unix_proto = {
594 	.name			= "UNIX",
595 	.owner			= THIS_MODULE,
596 	.obj_size		= sizeof(struct unix_sock),
597 };
598 
599 /*
600  * AF_UNIX sockets do not interact with hardware, hence they
601  * dont trigger interrupts - so it's safe for them to have
602  * bh-unsafe locking for their sk_receive_queue.lock. Split off
603  * this special lock-class by reinitializing the spinlock key:
604  */
605 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
606 
607 static struct sock *unix_create1(struct net *net, struct socket *sock)
608 {
609 	struct sock *sk = NULL;
610 	struct unix_sock *u;
611 
612 	atomic_long_inc(&unix_nr_socks);
613 	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
614 		goto out;
615 
616 	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
617 	if (!sk)
618 		goto out;
619 
620 	sock_init_data(sock, sk);
621 	lockdep_set_class(&sk->sk_receive_queue.lock,
622 				&af_unix_sk_receive_queue_lock_key);
623 
624 	sk->sk_write_space	= unix_write_space;
625 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
626 	sk->sk_destruct		= unix_sock_destructor;
627 	u	  = unix_sk(sk);
628 	u->dentry = NULL;
629 	u->mnt	  = NULL;
630 	spin_lock_init(&u->lock);
631 	atomic_long_set(&u->inflight, 0);
632 	INIT_LIST_HEAD(&u->link);
633 	mutex_init(&u->readlock); /* single task reading lock */
634 	init_waitqueue_head(&u->peer_wait);
635 	unix_insert_socket(unix_sockets_unbound, sk);
636 out:
637 	if (sk == NULL)
638 		atomic_long_dec(&unix_nr_socks);
639 	else {
640 		local_bh_disable();
641 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
642 		local_bh_enable();
643 	}
644 	return sk;
645 }
646 
647 static int unix_create(struct net *net, struct socket *sock, int protocol,
648 		       int kern)
649 {
650 	if (protocol && protocol != PF_UNIX)
651 		return -EPROTONOSUPPORT;
652 
653 	sock->state = SS_UNCONNECTED;
654 
655 	switch (sock->type) {
656 	case SOCK_STREAM:
657 		sock->ops = &unix_stream_ops;
658 		break;
659 		/*
660 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
661 		 *	nothing uses it.
662 		 */
663 	case SOCK_RAW:
664 		sock->type = SOCK_DGRAM;
665 	case SOCK_DGRAM:
666 		sock->ops = &unix_dgram_ops;
667 		break;
668 	case SOCK_SEQPACKET:
669 		sock->ops = &unix_seqpacket_ops;
670 		break;
671 	default:
672 		return -ESOCKTNOSUPPORT;
673 	}
674 
675 	return unix_create1(net, sock) ? 0 : -ENOMEM;
676 }
677 
678 static int unix_release(struct socket *sock)
679 {
680 	struct sock *sk = sock->sk;
681 
682 	if (!sk)
683 		return 0;
684 
685 	sock->sk = NULL;
686 
687 	return unix_release_sock(sk, 0);
688 }
689 
690 static int unix_autobind(struct socket *sock)
691 {
692 	struct sock *sk = sock->sk;
693 	struct net *net = sock_net(sk);
694 	struct unix_sock *u = unix_sk(sk);
695 	static u32 ordernum = 1;
696 	struct unix_address *addr;
697 	int err;
698 	unsigned int retries = 0;
699 
700 	mutex_lock(&u->readlock);
701 
702 	err = 0;
703 	if (u->addr)
704 		goto out;
705 
706 	err = -ENOMEM;
707 	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
708 	if (!addr)
709 		goto out;
710 
711 	addr->name->sun_family = AF_UNIX;
712 	atomic_set(&addr->refcnt, 1);
713 
714 retry:
715 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
716 	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
717 
718 	spin_lock(&unix_table_lock);
719 	ordernum = (ordernum+1)&0xFFFFF;
720 
721 	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
722 				      addr->hash)) {
723 		spin_unlock(&unix_table_lock);
724 		/*
725 		 * __unix_find_socket_byname() may take long time if many names
726 		 * are already in use.
727 		 */
728 		cond_resched();
729 		/* Give up if all names seems to be in use. */
730 		if (retries++ == 0xFFFFF) {
731 			err = -ENOSPC;
732 			kfree(addr);
733 			goto out;
734 		}
735 		goto retry;
736 	}
737 	addr->hash ^= sk->sk_type;
738 
739 	__unix_remove_socket(sk);
740 	u->addr = addr;
741 	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
742 	spin_unlock(&unix_table_lock);
743 	err = 0;
744 
745 out:	mutex_unlock(&u->readlock);
746 	return err;
747 }
748 
749 static struct sock *unix_find_other(struct net *net,
750 				    struct sockaddr_un *sunname, int len,
751 				    int type, unsigned hash, int *error)
752 {
753 	struct sock *u;
754 	struct path path;
755 	int err = 0;
756 
757 	if (sunname->sun_path[0]) {
758 		struct inode *inode;
759 		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
760 		if (err)
761 			goto fail;
762 		inode = path.dentry->d_inode;
763 		err = inode_permission(inode, MAY_WRITE);
764 		if (err)
765 			goto put_fail;
766 
767 		err = -ECONNREFUSED;
768 		if (!S_ISSOCK(inode->i_mode))
769 			goto put_fail;
770 		u = unix_find_socket_byinode(inode);
771 		if (!u)
772 			goto put_fail;
773 
774 		if (u->sk_type == type)
775 			touch_atime(path.mnt, path.dentry);
776 
777 		path_put(&path);
778 
779 		err = -EPROTOTYPE;
780 		if (u->sk_type != type) {
781 			sock_put(u);
782 			goto fail;
783 		}
784 	} else {
785 		err = -ECONNREFUSED;
786 		u = unix_find_socket_byname(net, sunname, len, type, hash);
787 		if (u) {
788 			struct dentry *dentry;
789 			dentry = unix_sk(u)->dentry;
790 			if (dentry)
791 				touch_atime(unix_sk(u)->mnt, dentry);
792 		} else
793 			goto fail;
794 	}
795 	return u;
796 
797 put_fail:
798 	path_put(&path);
799 fail:
800 	*error = err;
801 	return NULL;
802 }
803 
804 
805 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
806 {
807 	struct sock *sk = sock->sk;
808 	struct net *net = sock_net(sk);
809 	struct unix_sock *u = unix_sk(sk);
810 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
811 	struct dentry *dentry = NULL;
812 	struct nameidata nd;
813 	int err;
814 	unsigned hash;
815 	struct unix_address *addr;
816 	struct hlist_head *list;
817 
818 	err = -EINVAL;
819 	if (sunaddr->sun_family != AF_UNIX)
820 		goto out;
821 
822 	if (addr_len == sizeof(short)) {
823 		err = unix_autobind(sock);
824 		goto out;
825 	}
826 
827 	err = unix_mkname(sunaddr, addr_len, &hash);
828 	if (err < 0)
829 		goto out;
830 	addr_len = err;
831 
832 	mutex_lock(&u->readlock);
833 
834 	err = -EINVAL;
835 	if (u->addr)
836 		goto out_up;
837 
838 	err = -ENOMEM;
839 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
840 	if (!addr)
841 		goto out_up;
842 
843 	memcpy(addr->name, sunaddr, addr_len);
844 	addr->len = addr_len;
845 	addr->hash = hash ^ sk->sk_type;
846 	atomic_set(&addr->refcnt, 1);
847 
848 	if (sunaddr->sun_path[0]) {
849 		unsigned int mode;
850 		err = 0;
851 		/*
852 		 * Get the parent directory, calculate the hash for last
853 		 * component.
854 		 */
855 		err = kern_path_parent(sunaddr->sun_path, &nd);
856 		if (err)
857 			goto out_mknod_parent;
858 
859 		dentry = lookup_create(&nd, 0);
860 		err = PTR_ERR(dentry);
861 		if (IS_ERR(dentry))
862 			goto out_mknod_unlock;
863 
864 		/*
865 		 * All right, let's create it.
866 		 */
867 		mode = S_IFSOCK |
868 		       (SOCK_INODE(sock)->i_mode & ~current_umask());
869 		err = mnt_want_write(nd.path.mnt);
870 		if (err)
871 			goto out_mknod_dput;
872 		err = security_path_mknod(&nd.path, dentry, mode, 0);
873 		if (err)
874 			goto out_mknod_drop_write;
875 		err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
876 out_mknod_drop_write:
877 		mnt_drop_write(nd.path.mnt);
878 		if (err)
879 			goto out_mknod_dput;
880 		mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
881 		dput(nd.path.dentry);
882 		nd.path.dentry = dentry;
883 
884 		addr->hash = UNIX_HASH_SIZE;
885 	}
886 
887 	spin_lock(&unix_table_lock);
888 
889 	if (!sunaddr->sun_path[0]) {
890 		err = -EADDRINUSE;
891 		if (__unix_find_socket_byname(net, sunaddr, addr_len,
892 					      sk->sk_type, hash)) {
893 			unix_release_addr(addr);
894 			goto out_unlock;
895 		}
896 
897 		list = &unix_socket_table[addr->hash];
898 	} else {
899 		list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
900 		u->dentry = nd.path.dentry;
901 		u->mnt    = nd.path.mnt;
902 	}
903 
904 	err = 0;
905 	__unix_remove_socket(sk);
906 	u->addr = addr;
907 	__unix_insert_socket(list, sk);
908 
909 out_unlock:
910 	spin_unlock(&unix_table_lock);
911 out_up:
912 	mutex_unlock(&u->readlock);
913 out:
914 	return err;
915 
916 out_mknod_dput:
917 	dput(dentry);
918 out_mknod_unlock:
919 	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
920 	path_put(&nd.path);
921 out_mknod_parent:
922 	if (err == -EEXIST)
923 		err = -EADDRINUSE;
924 	unix_release_addr(addr);
925 	goto out_up;
926 }
927 
928 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
929 {
930 	if (unlikely(sk1 == sk2) || !sk2) {
931 		unix_state_lock(sk1);
932 		return;
933 	}
934 	if (sk1 < sk2) {
935 		unix_state_lock(sk1);
936 		unix_state_lock_nested(sk2);
937 	} else {
938 		unix_state_lock(sk2);
939 		unix_state_lock_nested(sk1);
940 	}
941 }
942 
943 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
944 {
945 	if (unlikely(sk1 == sk2) || !sk2) {
946 		unix_state_unlock(sk1);
947 		return;
948 	}
949 	unix_state_unlock(sk1);
950 	unix_state_unlock(sk2);
951 }
952 
953 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
954 			      int alen, int flags)
955 {
956 	struct sock *sk = sock->sk;
957 	struct net *net = sock_net(sk);
958 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
959 	struct sock *other;
960 	unsigned hash;
961 	int err;
962 
963 	if (addr->sa_family != AF_UNSPEC) {
964 		err = unix_mkname(sunaddr, alen, &hash);
965 		if (err < 0)
966 			goto out;
967 		alen = err;
968 
969 		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
970 		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
971 			goto out;
972 
973 restart:
974 		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
975 		if (!other)
976 			goto out;
977 
978 		unix_state_double_lock(sk, other);
979 
980 		/* Apparently VFS overslept socket death. Retry. */
981 		if (sock_flag(other, SOCK_DEAD)) {
982 			unix_state_double_unlock(sk, other);
983 			sock_put(other);
984 			goto restart;
985 		}
986 
987 		err = -EPERM;
988 		if (!unix_may_send(sk, other))
989 			goto out_unlock;
990 
991 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
992 		if (err)
993 			goto out_unlock;
994 
995 	} else {
996 		/*
997 		 *	1003.1g breaking connected state with AF_UNSPEC
998 		 */
999 		other = NULL;
1000 		unix_state_double_lock(sk, other);
1001 	}
1002 
1003 	/*
1004 	 * If it was connected, reconnect.
1005 	 */
1006 	if (unix_peer(sk)) {
1007 		struct sock *old_peer = unix_peer(sk);
1008 		unix_peer(sk) = other;
1009 		unix_state_double_unlock(sk, other);
1010 
1011 		if (other != old_peer)
1012 			unix_dgram_disconnected(sk, old_peer);
1013 		sock_put(old_peer);
1014 	} else {
1015 		unix_peer(sk) = other;
1016 		unix_state_double_unlock(sk, other);
1017 	}
1018 	return 0;
1019 
1020 out_unlock:
1021 	unix_state_double_unlock(sk, other);
1022 	sock_put(other);
1023 out:
1024 	return err;
1025 }
1026 
1027 static long unix_wait_for_peer(struct sock *other, long timeo)
1028 {
1029 	struct unix_sock *u = unix_sk(other);
1030 	int sched;
1031 	DEFINE_WAIT(wait);
1032 
1033 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1034 
1035 	sched = !sock_flag(other, SOCK_DEAD) &&
1036 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1037 		unix_recvq_full(other);
1038 
1039 	unix_state_unlock(other);
1040 
1041 	if (sched)
1042 		timeo = schedule_timeout(timeo);
1043 
1044 	finish_wait(&u->peer_wait, &wait);
1045 	return timeo;
1046 }
1047 
1048 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1049 			       int addr_len, int flags)
1050 {
1051 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1052 	struct sock *sk = sock->sk;
1053 	struct net *net = sock_net(sk);
1054 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1055 	struct sock *newsk = NULL;
1056 	struct sock *other = NULL;
1057 	struct sk_buff *skb = NULL;
1058 	unsigned hash;
1059 	int st;
1060 	int err;
1061 	long timeo;
1062 
1063 	err = unix_mkname(sunaddr, addr_len, &hash);
1064 	if (err < 0)
1065 		goto out;
1066 	addr_len = err;
1067 
1068 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1069 	    (err = unix_autobind(sock)) != 0)
1070 		goto out;
1071 
1072 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1073 
1074 	/* First of all allocate resources.
1075 	   If we will make it after state is locked,
1076 	   we will have to recheck all again in any case.
1077 	 */
1078 
1079 	err = -ENOMEM;
1080 
1081 	/* create new sock for complete connection */
1082 	newsk = unix_create1(sock_net(sk), NULL);
1083 	if (newsk == NULL)
1084 		goto out;
1085 
1086 	/* Allocate skb for sending to listening sock */
1087 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1088 	if (skb == NULL)
1089 		goto out;
1090 
1091 restart:
1092 	/*  Find listening sock. */
1093 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1094 	if (!other)
1095 		goto out;
1096 
1097 	/* Latch state of peer */
1098 	unix_state_lock(other);
1099 
1100 	/* Apparently VFS overslept socket death. Retry. */
1101 	if (sock_flag(other, SOCK_DEAD)) {
1102 		unix_state_unlock(other);
1103 		sock_put(other);
1104 		goto restart;
1105 	}
1106 
1107 	err = -ECONNREFUSED;
1108 	if (other->sk_state != TCP_LISTEN)
1109 		goto out_unlock;
1110 	if (other->sk_shutdown & RCV_SHUTDOWN)
1111 		goto out_unlock;
1112 
1113 	if (unix_recvq_full(other)) {
1114 		err = -EAGAIN;
1115 		if (!timeo)
1116 			goto out_unlock;
1117 
1118 		timeo = unix_wait_for_peer(other, timeo);
1119 
1120 		err = sock_intr_errno(timeo);
1121 		if (signal_pending(current))
1122 			goto out;
1123 		sock_put(other);
1124 		goto restart;
1125 	}
1126 
1127 	/* Latch our state.
1128 
1129 	   It is tricky place. We need to grab our state lock and cannot
1130 	   drop lock on peer. It is dangerous because deadlock is
1131 	   possible. Connect to self case and simultaneous
1132 	   attempt to connect are eliminated by checking socket
1133 	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1134 	   check this before attempt to grab lock.
1135 
1136 	   Well, and we have to recheck the state after socket locked.
1137 	 */
1138 	st = sk->sk_state;
1139 
1140 	switch (st) {
1141 	case TCP_CLOSE:
1142 		/* This is ok... continue with connect */
1143 		break;
1144 	case TCP_ESTABLISHED:
1145 		/* Socket is already connected */
1146 		err = -EISCONN;
1147 		goto out_unlock;
1148 	default:
1149 		err = -EINVAL;
1150 		goto out_unlock;
1151 	}
1152 
1153 	unix_state_lock_nested(sk);
1154 
1155 	if (sk->sk_state != st) {
1156 		unix_state_unlock(sk);
1157 		unix_state_unlock(other);
1158 		sock_put(other);
1159 		goto restart;
1160 	}
1161 
1162 	err = security_unix_stream_connect(sk, other, newsk);
1163 	if (err) {
1164 		unix_state_unlock(sk);
1165 		goto out_unlock;
1166 	}
1167 
1168 	/* The way is open! Fastly set all the necessary fields... */
1169 
1170 	sock_hold(sk);
1171 	unix_peer(newsk)	= sk;
1172 	newsk->sk_state		= TCP_ESTABLISHED;
1173 	newsk->sk_type		= sk->sk_type;
1174 	init_peercred(newsk);
1175 	newu = unix_sk(newsk);
1176 	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1177 	otheru = unix_sk(other);
1178 
1179 	/* copy address information from listening to new sock*/
1180 	if (otheru->addr) {
1181 		atomic_inc(&otheru->addr->refcnt);
1182 		newu->addr = otheru->addr;
1183 	}
1184 	if (otheru->dentry) {
1185 		newu->dentry	= dget(otheru->dentry);
1186 		newu->mnt	= mntget(otheru->mnt);
1187 	}
1188 
1189 	/* Set credentials */
1190 	copy_peercred(sk, other);
1191 
1192 	sock->state	= SS_CONNECTED;
1193 	sk->sk_state	= TCP_ESTABLISHED;
1194 	sock_hold(newsk);
1195 
1196 	smp_mb__after_atomic_inc();	/* sock_hold() does an atomic_inc() */
1197 	unix_peer(sk)	= newsk;
1198 
1199 	unix_state_unlock(sk);
1200 
1201 	/* take ten and and send info to listening sock */
1202 	spin_lock(&other->sk_receive_queue.lock);
1203 	__skb_queue_tail(&other->sk_receive_queue, skb);
1204 	spin_unlock(&other->sk_receive_queue.lock);
1205 	unix_state_unlock(other);
1206 	other->sk_data_ready(other, 0);
1207 	sock_put(other);
1208 	return 0;
1209 
1210 out_unlock:
1211 	if (other)
1212 		unix_state_unlock(other);
1213 
1214 out:
1215 	kfree_skb(skb);
1216 	if (newsk)
1217 		unix_release_sock(newsk, 0);
1218 	if (other)
1219 		sock_put(other);
1220 	return err;
1221 }
1222 
1223 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1224 {
1225 	struct sock *ska = socka->sk, *skb = sockb->sk;
1226 
1227 	/* Join our sockets back to back */
1228 	sock_hold(ska);
1229 	sock_hold(skb);
1230 	unix_peer(ska) = skb;
1231 	unix_peer(skb) = ska;
1232 	init_peercred(ska);
1233 	init_peercred(skb);
1234 
1235 	if (ska->sk_type != SOCK_DGRAM) {
1236 		ska->sk_state = TCP_ESTABLISHED;
1237 		skb->sk_state = TCP_ESTABLISHED;
1238 		socka->state  = SS_CONNECTED;
1239 		sockb->state  = SS_CONNECTED;
1240 	}
1241 	return 0;
1242 }
1243 
1244 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1245 {
1246 	struct sock *sk = sock->sk;
1247 	struct sock *tsk;
1248 	struct sk_buff *skb;
1249 	int err;
1250 
1251 	err = -EOPNOTSUPP;
1252 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1253 		goto out;
1254 
1255 	err = -EINVAL;
1256 	if (sk->sk_state != TCP_LISTEN)
1257 		goto out;
1258 
1259 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1260 	 * so that no locks are necessary.
1261 	 */
1262 
1263 	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1264 	if (!skb) {
1265 		/* This means receive shutdown. */
1266 		if (err == 0)
1267 			err = -EINVAL;
1268 		goto out;
1269 	}
1270 
1271 	tsk = skb->sk;
1272 	skb_free_datagram(sk, skb);
1273 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1274 
1275 	/* attach accepted sock to socket */
1276 	unix_state_lock(tsk);
1277 	newsock->state = SS_CONNECTED;
1278 	sock_graft(tsk, newsock);
1279 	unix_state_unlock(tsk);
1280 	return 0;
1281 
1282 out:
1283 	return err;
1284 }
1285 
1286 
1287 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1288 {
1289 	struct sock *sk = sock->sk;
1290 	struct unix_sock *u;
1291 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1292 	int err = 0;
1293 
1294 	if (peer) {
1295 		sk = unix_peer_get(sk);
1296 
1297 		err = -ENOTCONN;
1298 		if (!sk)
1299 			goto out;
1300 		err = 0;
1301 	} else {
1302 		sock_hold(sk);
1303 	}
1304 
1305 	u = unix_sk(sk);
1306 	unix_state_lock(sk);
1307 	if (!u->addr) {
1308 		sunaddr->sun_family = AF_UNIX;
1309 		sunaddr->sun_path[0] = 0;
1310 		*uaddr_len = sizeof(short);
1311 	} else {
1312 		struct unix_address *addr = u->addr;
1313 
1314 		*uaddr_len = addr->len;
1315 		memcpy(sunaddr, addr->name, *uaddr_len);
1316 	}
1317 	unix_state_unlock(sk);
1318 	sock_put(sk);
1319 out:
1320 	return err;
1321 }
1322 
1323 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1324 {
1325 	int i;
1326 
1327 	scm->fp = UNIXCB(skb).fp;
1328 	UNIXCB(skb).fp = NULL;
1329 
1330 	for (i = scm->fp->count-1; i >= 0; i--)
1331 		unix_notinflight(scm->fp->fp[i]);
1332 }
1333 
1334 static void unix_destruct_scm(struct sk_buff *skb)
1335 {
1336 	struct scm_cookie scm;
1337 	memset(&scm, 0, sizeof(scm));
1338 	scm.pid  = UNIXCB(skb).pid;
1339 	scm.cred = UNIXCB(skb).cred;
1340 	if (UNIXCB(skb).fp)
1341 		unix_detach_fds(&scm, skb);
1342 
1343 	/* Alas, it calls VFS */
1344 	/* So fscking what? fput() had been SMP-safe since the last Summer */
1345 	scm_destroy(&scm);
1346 	sock_wfree(skb);
1347 }
1348 
1349 #define MAX_RECURSION_LEVEL 4
1350 
1351 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1352 {
1353 	int i;
1354 	unsigned char max_level = 0;
1355 	int unix_sock_count = 0;
1356 
1357 	for (i = scm->fp->count - 1; i >= 0; i--) {
1358 		struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1359 
1360 		if (sk) {
1361 			unix_sock_count++;
1362 			max_level = max(max_level,
1363 					unix_sk(sk)->recursion_level);
1364 		}
1365 	}
1366 	if (unlikely(max_level > MAX_RECURSION_LEVEL))
1367 		return -ETOOMANYREFS;
1368 
1369 	/*
1370 	 * Need to duplicate file references for the sake of garbage
1371 	 * collection.  Otherwise a socket in the fps might become a
1372 	 * candidate for GC while the skb is not yet queued.
1373 	 */
1374 	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1375 	if (!UNIXCB(skb).fp)
1376 		return -ENOMEM;
1377 
1378 	if (unix_sock_count) {
1379 		for (i = scm->fp->count - 1; i >= 0; i--)
1380 			unix_inflight(scm->fp->fp[i]);
1381 	}
1382 	return max_level;
1383 }
1384 
1385 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1386 {
1387 	int err = 0;
1388 	UNIXCB(skb).pid  = get_pid(scm->pid);
1389 	UNIXCB(skb).cred = get_cred(scm->cred);
1390 	UNIXCB(skb).fp = NULL;
1391 	if (scm->fp && send_fds)
1392 		err = unix_attach_fds(scm, skb);
1393 
1394 	skb->destructor = unix_destruct_scm;
1395 	return err;
1396 }
1397 
1398 /*
1399  *	Send AF_UNIX data.
1400  */
1401 
1402 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1403 			      struct msghdr *msg, size_t len)
1404 {
1405 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1406 	struct sock *sk = sock->sk;
1407 	struct net *net = sock_net(sk);
1408 	struct unix_sock *u = unix_sk(sk);
1409 	struct sockaddr_un *sunaddr = msg->msg_name;
1410 	struct sock *other = NULL;
1411 	int namelen = 0; /* fake GCC */
1412 	int err;
1413 	unsigned hash;
1414 	struct sk_buff *skb;
1415 	long timeo;
1416 	struct scm_cookie tmp_scm;
1417 	int max_level;
1418 
1419 	if (NULL == siocb->scm)
1420 		siocb->scm = &tmp_scm;
1421 	wait_for_unix_gc();
1422 	err = scm_send(sock, msg, siocb->scm);
1423 	if (err < 0)
1424 		return err;
1425 
1426 	err = -EOPNOTSUPP;
1427 	if (msg->msg_flags&MSG_OOB)
1428 		goto out;
1429 
1430 	if (msg->msg_namelen) {
1431 		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1432 		if (err < 0)
1433 			goto out;
1434 		namelen = err;
1435 	} else {
1436 		sunaddr = NULL;
1437 		err = -ENOTCONN;
1438 		other = unix_peer_get(sk);
1439 		if (!other)
1440 			goto out;
1441 	}
1442 
1443 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1444 	    && (err = unix_autobind(sock)) != 0)
1445 		goto out;
1446 
1447 	err = -EMSGSIZE;
1448 	if (len > sk->sk_sndbuf - 32)
1449 		goto out;
1450 
1451 	skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1452 	if (skb == NULL)
1453 		goto out;
1454 
1455 	err = unix_scm_to_skb(siocb->scm, skb, true);
1456 	if (err < 0)
1457 		goto out_free;
1458 	max_level = err + 1;
1459 	unix_get_secdata(siocb->scm, skb);
1460 
1461 	skb_reset_transport_header(skb);
1462 	err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1463 	if (err)
1464 		goto out_free;
1465 
1466 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1467 
1468 restart:
1469 	if (!other) {
1470 		err = -ECONNRESET;
1471 		if (sunaddr == NULL)
1472 			goto out_free;
1473 
1474 		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1475 					hash, &err);
1476 		if (other == NULL)
1477 			goto out_free;
1478 	}
1479 
1480 	if (sk_filter(other, skb) < 0) {
1481 		/* Toss the packet but do not return any error to the sender */
1482 		err = len;
1483 		goto out_free;
1484 	}
1485 
1486 	unix_state_lock(other);
1487 	err = -EPERM;
1488 	if (!unix_may_send(sk, other))
1489 		goto out_unlock;
1490 
1491 	if (sock_flag(other, SOCK_DEAD)) {
1492 		/*
1493 		 *	Check with 1003.1g - what should
1494 		 *	datagram error
1495 		 */
1496 		unix_state_unlock(other);
1497 		sock_put(other);
1498 
1499 		err = 0;
1500 		unix_state_lock(sk);
1501 		if (unix_peer(sk) == other) {
1502 			unix_peer(sk) = NULL;
1503 			unix_state_unlock(sk);
1504 
1505 			unix_dgram_disconnected(sk, other);
1506 			sock_put(other);
1507 			err = -ECONNREFUSED;
1508 		} else {
1509 			unix_state_unlock(sk);
1510 		}
1511 
1512 		other = NULL;
1513 		if (err)
1514 			goto out_free;
1515 		goto restart;
1516 	}
1517 
1518 	err = -EPIPE;
1519 	if (other->sk_shutdown & RCV_SHUTDOWN)
1520 		goto out_unlock;
1521 
1522 	if (sk->sk_type != SOCK_SEQPACKET) {
1523 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1524 		if (err)
1525 			goto out_unlock;
1526 	}
1527 
1528 	if (unix_peer(other) != sk && unix_recvq_full(other)) {
1529 		if (!timeo) {
1530 			err = -EAGAIN;
1531 			goto out_unlock;
1532 		}
1533 
1534 		timeo = unix_wait_for_peer(other, timeo);
1535 
1536 		err = sock_intr_errno(timeo);
1537 		if (signal_pending(current))
1538 			goto out_free;
1539 
1540 		goto restart;
1541 	}
1542 
1543 	if (sock_flag(other, SOCK_RCVTSTAMP))
1544 		__net_timestamp(skb);
1545 	skb_queue_tail(&other->sk_receive_queue, skb);
1546 	if (max_level > unix_sk(other)->recursion_level)
1547 		unix_sk(other)->recursion_level = max_level;
1548 	unix_state_unlock(other);
1549 	other->sk_data_ready(other, len);
1550 	sock_put(other);
1551 	scm_destroy(siocb->scm);
1552 	return len;
1553 
1554 out_unlock:
1555 	unix_state_unlock(other);
1556 out_free:
1557 	kfree_skb(skb);
1558 out:
1559 	if (other)
1560 		sock_put(other);
1561 	scm_destroy(siocb->scm);
1562 	return err;
1563 }
1564 
1565 
1566 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1567 			       struct msghdr *msg, size_t len)
1568 {
1569 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1570 	struct sock *sk = sock->sk;
1571 	struct sock *other = NULL;
1572 	int err, size;
1573 	struct sk_buff *skb;
1574 	int sent = 0;
1575 	struct scm_cookie tmp_scm;
1576 	bool fds_sent = false;
1577 	int max_level;
1578 
1579 	if (NULL == siocb->scm)
1580 		siocb->scm = &tmp_scm;
1581 	wait_for_unix_gc();
1582 	err = scm_send(sock, msg, siocb->scm);
1583 	if (err < 0)
1584 		return err;
1585 
1586 	err = -EOPNOTSUPP;
1587 	if (msg->msg_flags&MSG_OOB)
1588 		goto out_err;
1589 
1590 	if (msg->msg_namelen) {
1591 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1592 		goto out_err;
1593 	} else {
1594 		err = -ENOTCONN;
1595 		other = unix_peer(sk);
1596 		if (!other)
1597 			goto out_err;
1598 	}
1599 
1600 	if (sk->sk_shutdown & SEND_SHUTDOWN)
1601 		goto pipe_err;
1602 
1603 	while (sent < len) {
1604 		/*
1605 		 *	Optimisation for the fact that under 0.01% of X
1606 		 *	messages typically need breaking up.
1607 		 */
1608 
1609 		size = len-sent;
1610 
1611 		/* Keep two messages in the pipe so it schedules better */
1612 		if (size > ((sk->sk_sndbuf >> 1) - 64))
1613 			size = (sk->sk_sndbuf >> 1) - 64;
1614 
1615 		if (size > SKB_MAX_ALLOC)
1616 			size = SKB_MAX_ALLOC;
1617 
1618 		/*
1619 		 *	Grab a buffer
1620 		 */
1621 
1622 		skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1623 					  &err);
1624 
1625 		if (skb == NULL)
1626 			goto out_err;
1627 
1628 		/*
1629 		 *	If you pass two values to the sock_alloc_send_skb
1630 		 *	it tries to grab the large buffer with GFP_NOFS
1631 		 *	(which can fail easily), and if it fails grab the
1632 		 *	fallback size buffer which is under a page and will
1633 		 *	succeed. [Alan]
1634 		 */
1635 		size = min_t(int, size, skb_tailroom(skb));
1636 
1637 
1638 		/* Only send the fds in the first buffer */
1639 		err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
1640 		if (err < 0) {
1641 			kfree_skb(skb);
1642 			goto out_err;
1643 		}
1644 		max_level = err + 1;
1645 		fds_sent = true;
1646 
1647 		err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1648 		if (err) {
1649 			kfree_skb(skb);
1650 			goto out_err;
1651 		}
1652 
1653 		unix_state_lock(other);
1654 
1655 		if (sock_flag(other, SOCK_DEAD) ||
1656 		    (other->sk_shutdown & RCV_SHUTDOWN))
1657 			goto pipe_err_free;
1658 
1659 		skb_queue_tail(&other->sk_receive_queue, skb);
1660 		if (max_level > unix_sk(other)->recursion_level)
1661 			unix_sk(other)->recursion_level = max_level;
1662 		unix_state_unlock(other);
1663 		other->sk_data_ready(other, size);
1664 		sent += size;
1665 	}
1666 
1667 	scm_destroy(siocb->scm);
1668 	siocb->scm = NULL;
1669 
1670 	return sent;
1671 
1672 pipe_err_free:
1673 	unix_state_unlock(other);
1674 	kfree_skb(skb);
1675 pipe_err:
1676 	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1677 		send_sig(SIGPIPE, current, 0);
1678 	err = -EPIPE;
1679 out_err:
1680 	scm_destroy(siocb->scm);
1681 	siocb->scm = NULL;
1682 	return sent ? : err;
1683 }
1684 
1685 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1686 				  struct msghdr *msg, size_t len)
1687 {
1688 	int err;
1689 	struct sock *sk = sock->sk;
1690 
1691 	err = sock_error(sk);
1692 	if (err)
1693 		return err;
1694 
1695 	if (sk->sk_state != TCP_ESTABLISHED)
1696 		return -ENOTCONN;
1697 
1698 	if (msg->msg_namelen)
1699 		msg->msg_namelen = 0;
1700 
1701 	return unix_dgram_sendmsg(kiocb, sock, msg, len);
1702 }
1703 
1704 static int unix_seqpacket_recvmsg(struct kiocb *iocb, struct socket *sock,
1705 			      struct msghdr *msg, size_t size,
1706 			      int flags)
1707 {
1708 	struct sock *sk = sock->sk;
1709 
1710 	if (sk->sk_state != TCP_ESTABLISHED)
1711 		return -ENOTCONN;
1712 
1713 	return unix_dgram_recvmsg(iocb, sock, msg, size, flags);
1714 }
1715 
1716 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1717 {
1718 	struct unix_sock *u = unix_sk(sk);
1719 
1720 	msg->msg_namelen = 0;
1721 	if (u->addr) {
1722 		msg->msg_namelen = u->addr->len;
1723 		memcpy(msg->msg_name, u->addr->name, u->addr->len);
1724 	}
1725 }
1726 
1727 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1728 			      struct msghdr *msg, size_t size,
1729 			      int flags)
1730 {
1731 	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1732 	struct scm_cookie tmp_scm;
1733 	struct sock *sk = sock->sk;
1734 	struct unix_sock *u = unix_sk(sk);
1735 	int noblock = flags & MSG_DONTWAIT;
1736 	struct sk_buff *skb;
1737 	int err;
1738 
1739 	err = -EOPNOTSUPP;
1740 	if (flags&MSG_OOB)
1741 		goto out;
1742 
1743 	msg->msg_namelen = 0;
1744 
1745 	err = mutex_lock_interruptible(&u->readlock);
1746 	if (err) {
1747 		err = sock_intr_errno(sock_rcvtimeo(sk, noblock));
1748 		goto out;
1749 	}
1750 
1751 	skb = skb_recv_datagram(sk, flags, noblock, &err);
1752 	if (!skb) {
1753 		unix_state_lock(sk);
1754 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1755 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1756 		    (sk->sk_shutdown & RCV_SHUTDOWN))
1757 			err = 0;
1758 		unix_state_unlock(sk);
1759 		goto out_unlock;
1760 	}
1761 
1762 	wake_up_interruptible_sync_poll(&u->peer_wait,
1763 					POLLOUT | POLLWRNORM | POLLWRBAND);
1764 
1765 	if (msg->msg_name)
1766 		unix_copy_addr(msg, skb->sk);
1767 
1768 	if (size > skb->len)
1769 		size = skb->len;
1770 	else if (size < skb->len)
1771 		msg->msg_flags |= MSG_TRUNC;
1772 
1773 	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1774 	if (err)
1775 		goto out_free;
1776 
1777 	if (sock_flag(sk, SOCK_RCVTSTAMP))
1778 		__sock_recv_timestamp(msg, sk, skb);
1779 
1780 	if (!siocb->scm) {
1781 		siocb->scm = &tmp_scm;
1782 		memset(&tmp_scm, 0, sizeof(tmp_scm));
1783 	}
1784 	scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
1785 	unix_set_secdata(siocb->scm, skb);
1786 
1787 	if (!(flags & MSG_PEEK)) {
1788 		if (UNIXCB(skb).fp)
1789 			unix_detach_fds(siocb->scm, skb);
1790 	} else {
1791 		/* It is questionable: on PEEK we could:
1792 		   - do not return fds - good, but too simple 8)
1793 		   - return fds, and do not return them on read (old strategy,
1794 		     apparently wrong)
1795 		   - clone fds (I chose it for now, it is the most universal
1796 		     solution)
1797 
1798 		   POSIX 1003.1g does not actually define this clearly
1799 		   at all. POSIX 1003.1g doesn't define a lot of things
1800 		   clearly however!
1801 
1802 		*/
1803 		if (UNIXCB(skb).fp)
1804 			siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1805 	}
1806 	err = size;
1807 
1808 	scm_recv(sock, msg, siocb->scm, flags);
1809 
1810 out_free:
1811 	skb_free_datagram(sk, skb);
1812 out_unlock:
1813 	mutex_unlock(&u->readlock);
1814 out:
1815 	return err;
1816 }
1817 
1818 /*
1819  *	Sleep until data has arrive. But check for races..
1820  */
1821 
1822 static long unix_stream_data_wait(struct sock *sk, long timeo)
1823 {
1824 	DEFINE_WAIT(wait);
1825 
1826 	unix_state_lock(sk);
1827 
1828 	for (;;) {
1829 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1830 
1831 		if (!skb_queue_empty(&sk->sk_receive_queue) ||
1832 		    sk->sk_err ||
1833 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1834 		    signal_pending(current) ||
1835 		    !timeo)
1836 			break;
1837 
1838 		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1839 		unix_state_unlock(sk);
1840 		timeo = schedule_timeout(timeo);
1841 		unix_state_lock(sk);
1842 		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1843 	}
1844 
1845 	finish_wait(sk_sleep(sk), &wait);
1846 	unix_state_unlock(sk);
1847 	return timeo;
1848 }
1849 
1850 
1851 
1852 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1853 			       struct msghdr *msg, size_t size,
1854 			       int flags)
1855 {
1856 	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1857 	struct scm_cookie tmp_scm;
1858 	struct sock *sk = sock->sk;
1859 	struct unix_sock *u = unix_sk(sk);
1860 	struct sockaddr_un *sunaddr = msg->msg_name;
1861 	int copied = 0;
1862 	int check_creds = 0;
1863 	int target;
1864 	int err = 0;
1865 	long timeo;
1866 
1867 	err = -EINVAL;
1868 	if (sk->sk_state != TCP_ESTABLISHED)
1869 		goto out;
1870 
1871 	err = -EOPNOTSUPP;
1872 	if (flags&MSG_OOB)
1873 		goto out;
1874 
1875 	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1876 	timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1877 
1878 	msg->msg_namelen = 0;
1879 
1880 	/* Lock the socket to prevent queue disordering
1881 	 * while sleeps in memcpy_tomsg
1882 	 */
1883 
1884 	if (!siocb->scm) {
1885 		siocb->scm = &tmp_scm;
1886 		memset(&tmp_scm, 0, sizeof(tmp_scm));
1887 	}
1888 
1889 	err = mutex_lock_interruptible(&u->readlock);
1890 	if (err) {
1891 		err = sock_intr_errno(timeo);
1892 		goto out;
1893 	}
1894 
1895 	do {
1896 		int chunk;
1897 		struct sk_buff *skb;
1898 
1899 		unix_state_lock(sk);
1900 		skb = skb_dequeue(&sk->sk_receive_queue);
1901 		if (skb == NULL) {
1902 			unix_sk(sk)->recursion_level = 0;
1903 			if (copied >= target)
1904 				goto unlock;
1905 
1906 			/*
1907 			 *	POSIX 1003.1g mandates this order.
1908 			 */
1909 
1910 			err = sock_error(sk);
1911 			if (err)
1912 				goto unlock;
1913 			if (sk->sk_shutdown & RCV_SHUTDOWN)
1914 				goto unlock;
1915 
1916 			unix_state_unlock(sk);
1917 			err = -EAGAIN;
1918 			if (!timeo)
1919 				break;
1920 			mutex_unlock(&u->readlock);
1921 
1922 			timeo = unix_stream_data_wait(sk, timeo);
1923 
1924 			if (signal_pending(current)
1925 			    ||  mutex_lock_interruptible(&u->readlock)) {
1926 				err = sock_intr_errno(timeo);
1927 				goto out;
1928 			}
1929 
1930 			continue;
1931  unlock:
1932 			unix_state_unlock(sk);
1933 			break;
1934 		}
1935 		unix_state_unlock(sk);
1936 
1937 		if (check_creds) {
1938 			/* Never glue messages from different writers */
1939 			if ((UNIXCB(skb).pid  != siocb->scm->pid) ||
1940 			    (UNIXCB(skb).cred != siocb->scm->cred)) {
1941 				skb_queue_head(&sk->sk_receive_queue, skb);
1942 				break;
1943 			}
1944 		} else {
1945 			/* Copy credentials */
1946 			scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
1947 			check_creds = 1;
1948 		}
1949 
1950 		/* Copy address just once */
1951 		if (sunaddr) {
1952 			unix_copy_addr(msg, skb->sk);
1953 			sunaddr = NULL;
1954 		}
1955 
1956 		chunk = min_t(unsigned int, skb->len, size);
1957 		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1958 			skb_queue_head(&sk->sk_receive_queue, skb);
1959 			if (copied == 0)
1960 				copied = -EFAULT;
1961 			break;
1962 		}
1963 		copied += chunk;
1964 		size -= chunk;
1965 
1966 		/* Mark read part of skb as used */
1967 		if (!(flags & MSG_PEEK)) {
1968 			skb_pull(skb, chunk);
1969 
1970 			if (UNIXCB(skb).fp)
1971 				unix_detach_fds(siocb->scm, skb);
1972 
1973 			/* put the skb back if we didn't use it up.. */
1974 			if (skb->len) {
1975 				skb_queue_head(&sk->sk_receive_queue, skb);
1976 				break;
1977 			}
1978 
1979 			consume_skb(skb);
1980 
1981 			if (siocb->scm->fp)
1982 				break;
1983 		} else {
1984 			/* It is questionable, see note in unix_dgram_recvmsg.
1985 			 */
1986 			if (UNIXCB(skb).fp)
1987 				siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1988 
1989 			/* put message back and return */
1990 			skb_queue_head(&sk->sk_receive_queue, skb);
1991 			break;
1992 		}
1993 	} while (size);
1994 
1995 	mutex_unlock(&u->readlock);
1996 	scm_recv(sock, msg, siocb->scm, flags);
1997 out:
1998 	return copied ? : err;
1999 }
2000 
2001 static int unix_shutdown(struct socket *sock, int mode)
2002 {
2003 	struct sock *sk = sock->sk;
2004 	struct sock *other;
2005 
2006 	mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
2007 
2008 	if (!mode)
2009 		return 0;
2010 
2011 	unix_state_lock(sk);
2012 	sk->sk_shutdown |= mode;
2013 	other = unix_peer(sk);
2014 	if (other)
2015 		sock_hold(other);
2016 	unix_state_unlock(sk);
2017 	sk->sk_state_change(sk);
2018 
2019 	if (other &&
2020 		(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2021 
2022 		int peer_mode = 0;
2023 
2024 		if (mode&RCV_SHUTDOWN)
2025 			peer_mode |= SEND_SHUTDOWN;
2026 		if (mode&SEND_SHUTDOWN)
2027 			peer_mode |= RCV_SHUTDOWN;
2028 		unix_state_lock(other);
2029 		other->sk_shutdown |= peer_mode;
2030 		unix_state_unlock(other);
2031 		other->sk_state_change(other);
2032 		if (peer_mode == SHUTDOWN_MASK)
2033 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2034 		else if (peer_mode & RCV_SHUTDOWN)
2035 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2036 	}
2037 	if (other)
2038 		sock_put(other);
2039 
2040 	return 0;
2041 }
2042 
2043 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2044 {
2045 	struct sock *sk = sock->sk;
2046 	long amount = 0;
2047 	int err;
2048 
2049 	switch (cmd) {
2050 	case SIOCOUTQ:
2051 		amount = sk_wmem_alloc_get(sk);
2052 		err = put_user(amount, (int __user *)arg);
2053 		break;
2054 	case SIOCINQ:
2055 		{
2056 			struct sk_buff *skb;
2057 
2058 			if (sk->sk_state == TCP_LISTEN) {
2059 				err = -EINVAL;
2060 				break;
2061 			}
2062 
2063 			spin_lock(&sk->sk_receive_queue.lock);
2064 			if (sk->sk_type == SOCK_STREAM ||
2065 			    sk->sk_type == SOCK_SEQPACKET) {
2066 				skb_queue_walk(&sk->sk_receive_queue, skb)
2067 					amount += skb->len;
2068 			} else {
2069 				skb = skb_peek(&sk->sk_receive_queue);
2070 				if (skb)
2071 					amount = skb->len;
2072 			}
2073 			spin_unlock(&sk->sk_receive_queue.lock);
2074 			err = put_user(amount, (int __user *)arg);
2075 			break;
2076 		}
2077 
2078 	default:
2079 		err = -ENOIOCTLCMD;
2080 		break;
2081 	}
2082 	return err;
2083 }
2084 
2085 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2086 {
2087 	struct sock *sk = sock->sk;
2088 	unsigned int mask;
2089 
2090 	sock_poll_wait(file, sk_sleep(sk), wait);
2091 	mask = 0;
2092 
2093 	/* exceptional events? */
2094 	if (sk->sk_err)
2095 		mask |= POLLERR;
2096 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2097 		mask |= POLLHUP;
2098 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2099 		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2100 
2101 	/* readable? */
2102 	if (!skb_queue_empty(&sk->sk_receive_queue))
2103 		mask |= POLLIN | POLLRDNORM;
2104 
2105 	/* Connection-based need to check for termination and startup */
2106 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2107 	    sk->sk_state == TCP_CLOSE)
2108 		mask |= POLLHUP;
2109 
2110 	/*
2111 	 * we set writable also when the other side has shut down the
2112 	 * connection. This prevents stuck sockets.
2113 	 */
2114 	if (unix_writable(sk))
2115 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2116 
2117 	return mask;
2118 }
2119 
2120 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2121 				    poll_table *wait)
2122 {
2123 	struct sock *sk = sock->sk, *other;
2124 	unsigned int mask, writable;
2125 
2126 	sock_poll_wait(file, sk_sleep(sk), wait);
2127 	mask = 0;
2128 
2129 	/* exceptional events? */
2130 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2131 		mask |= POLLERR;
2132 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2133 		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2134 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2135 		mask |= POLLHUP;
2136 
2137 	/* readable? */
2138 	if (!skb_queue_empty(&sk->sk_receive_queue))
2139 		mask |= POLLIN | POLLRDNORM;
2140 
2141 	/* Connection-based need to check for termination and startup */
2142 	if (sk->sk_type == SOCK_SEQPACKET) {
2143 		if (sk->sk_state == TCP_CLOSE)
2144 			mask |= POLLHUP;
2145 		/* connection hasn't started yet? */
2146 		if (sk->sk_state == TCP_SYN_SENT)
2147 			return mask;
2148 	}
2149 
2150 	/* No write status requested, avoid expensive OUT tests. */
2151 	if (wait && !(wait->key & (POLLWRBAND | POLLWRNORM | POLLOUT)))
2152 		return mask;
2153 
2154 	writable = unix_writable(sk);
2155 	other = unix_peer_get(sk);
2156 	if (other) {
2157 		if (unix_peer(other) != sk) {
2158 			sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
2159 			if (unix_recvq_full(other))
2160 				writable = 0;
2161 		}
2162 		sock_put(other);
2163 	}
2164 
2165 	if (writable)
2166 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2167 	else
2168 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2169 
2170 	return mask;
2171 }
2172 
2173 #ifdef CONFIG_PROC_FS
2174 static struct sock *first_unix_socket(int *i)
2175 {
2176 	for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
2177 		if (!hlist_empty(&unix_socket_table[*i]))
2178 			return __sk_head(&unix_socket_table[*i]);
2179 	}
2180 	return NULL;
2181 }
2182 
2183 static struct sock *next_unix_socket(int *i, struct sock *s)
2184 {
2185 	struct sock *next = sk_next(s);
2186 	/* More in this chain? */
2187 	if (next)
2188 		return next;
2189 	/* Look for next non-empty chain. */
2190 	for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2191 		if (!hlist_empty(&unix_socket_table[*i]))
2192 			return __sk_head(&unix_socket_table[*i]);
2193 	}
2194 	return NULL;
2195 }
2196 
2197 struct unix_iter_state {
2198 	struct seq_net_private p;
2199 	int i;
2200 };
2201 
2202 static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
2203 {
2204 	struct unix_iter_state *iter = seq->private;
2205 	loff_t off = 0;
2206 	struct sock *s;
2207 
2208 	for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2209 		if (sock_net(s) != seq_file_net(seq))
2210 			continue;
2211 		if (off == pos)
2212 			return s;
2213 		++off;
2214 	}
2215 	return NULL;
2216 }
2217 
2218 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2219 	__acquires(unix_table_lock)
2220 {
2221 	spin_lock(&unix_table_lock);
2222 	return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2223 }
2224 
2225 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2226 {
2227 	struct unix_iter_state *iter = seq->private;
2228 	struct sock *sk = v;
2229 	++*pos;
2230 
2231 	if (v == SEQ_START_TOKEN)
2232 		sk = first_unix_socket(&iter->i);
2233 	else
2234 		sk = next_unix_socket(&iter->i, sk);
2235 	while (sk && (sock_net(sk) != seq_file_net(seq)))
2236 		sk = next_unix_socket(&iter->i, sk);
2237 	return sk;
2238 }
2239 
2240 static void unix_seq_stop(struct seq_file *seq, void *v)
2241 	__releases(unix_table_lock)
2242 {
2243 	spin_unlock(&unix_table_lock);
2244 }
2245 
2246 static int unix_seq_show(struct seq_file *seq, void *v)
2247 {
2248 
2249 	if (v == SEQ_START_TOKEN)
2250 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2251 			 "Inode Path\n");
2252 	else {
2253 		struct sock *s = v;
2254 		struct unix_sock *u = unix_sk(s);
2255 		unix_state_lock(s);
2256 
2257 		seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
2258 			s,
2259 			atomic_read(&s->sk_refcnt),
2260 			0,
2261 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2262 			s->sk_type,
2263 			s->sk_socket ?
2264 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2265 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2266 			sock_i_ino(s));
2267 
2268 		if (u->addr) {
2269 			int i, len;
2270 			seq_putc(seq, ' ');
2271 
2272 			i = 0;
2273 			len = u->addr->len - sizeof(short);
2274 			if (!UNIX_ABSTRACT(s))
2275 				len--;
2276 			else {
2277 				seq_putc(seq, '@');
2278 				i++;
2279 			}
2280 			for ( ; i < len; i++)
2281 				seq_putc(seq, u->addr->name->sun_path[i]);
2282 		}
2283 		unix_state_unlock(s);
2284 		seq_putc(seq, '\n');
2285 	}
2286 
2287 	return 0;
2288 }
2289 
2290 static const struct seq_operations unix_seq_ops = {
2291 	.start  = unix_seq_start,
2292 	.next   = unix_seq_next,
2293 	.stop   = unix_seq_stop,
2294 	.show   = unix_seq_show,
2295 };
2296 
2297 static int unix_seq_open(struct inode *inode, struct file *file)
2298 {
2299 	return seq_open_net(inode, file, &unix_seq_ops,
2300 			    sizeof(struct unix_iter_state));
2301 }
2302 
2303 static const struct file_operations unix_seq_fops = {
2304 	.owner		= THIS_MODULE,
2305 	.open		= unix_seq_open,
2306 	.read		= seq_read,
2307 	.llseek		= seq_lseek,
2308 	.release	= seq_release_net,
2309 };
2310 
2311 #endif
2312 
2313 static const struct net_proto_family unix_family_ops = {
2314 	.family = PF_UNIX,
2315 	.create = unix_create,
2316 	.owner	= THIS_MODULE,
2317 };
2318 
2319 
2320 static int __net_init unix_net_init(struct net *net)
2321 {
2322 	int error = -ENOMEM;
2323 
2324 	net->unx.sysctl_max_dgram_qlen = 10;
2325 	if (unix_sysctl_register(net))
2326 		goto out;
2327 
2328 #ifdef CONFIG_PROC_FS
2329 	if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2330 		unix_sysctl_unregister(net);
2331 		goto out;
2332 	}
2333 #endif
2334 	error = 0;
2335 out:
2336 	return error;
2337 }
2338 
2339 static void __net_exit unix_net_exit(struct net *net)
2340 {
2341 	unix_sysctl_unregister(net);
2342 	proc_net_remove(net, "unix");
2343 }
2344 
2345 static struct pernet_operations unix_net_ops = {
2346 	.init = unix_net_init,
2347 	.exit = unix_net_exit,
2348 };
2349 
2350 static int __init af_unix_init(void)
2351 {
2352 	int rc = -1;
2353 	struct sk_buff *dummy_skb;
2354 
2355 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2356 
2357 	rc = proto_register(&unix_proto, 1);
2358 	if (rc != 0) {
2359 		printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2360 		       __func__);
2361 		goto out;
2362 	}
2363 
2364 	sock_register(&unix_family_ops);
2365 	register_pernet_subsys(&unix_net_ops);
2366 out:
2367 	return rc;
2368 }
2369 
2370 static void __exit af_unix_exit(void)
2371 {
2372 	sock_unregister(PF_UNIX);
2373 	proto_unregister(&unix_proto);
2374 	unregister_pernet_subsys(&unix_net_ops);
2375 }
2376 
2377 /* Earlier than device_initcall() so that other drivers invoking
2378    request_module() don't end up in a loop when modprobe tries
2379    to use a UNIX socket. But later than subsys_initcall() because
2380    we depend on stuff initialised there */
2381 fs_initcall(af_unix_init);
2382 module_exit(af_unix_exit);
2383 
2384 MODULE_LICENSE("GPL");
2385 MODULE_ALIAS_NETPROTO(PF_UNIX);
2386