xref: /linux/net/unix/garbage.c (revision 4a3e2f711a00a1feb72ae12fdc749da10179d185)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * NET3:	Garbage Collector For AF_UNIX sockets
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  * Garbage Collector:
51da177e4SLinus Torvalds  *	Copyright (C) Barak A. Pearlmutter.
61da177e4SLinus Torvalds  *	Released under the GPL version 2 or later.
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  * Chopped about by Alan Cox 22/3/96 to make it fit the AF_UNIX socket problem.
91da177e4SLinus Torvalds  * If it doesn't work blame me, it worked when Barak sent it.
101da177e4SLinus Torvalds  *
111da177e4SLinus Torvalds  * Assumptions:
121da177e4SLinus Torvalds  *
131da177e4SLinus Torvalds  *  - object w/ a bit
141da177e4SLinus Torvalds  *  - free list
151da177e4SLinus Torvalds  *
161da177e4SLinus Torvalds  * Current optimizations:
171da177e4SLinus Torvalds  *
181da177e4SLinus Torvalds  *  - explicit stack instead of recursion
191da177e4SLinus Torvalds  *  - tail recurse on first born instead of immediate push/pop
201da177e4SLinus Torvalds  *  - we gather the stuff that should not be killed into tree
211da177e4SLinus Torvalds  *    and stack is just a path from root to the current pointer.
221da177e4SLinus Torvalds  *
231da177e4SLinus Torvalds  *  Future optimizations:
241da177e4SLinus Torvalds  *
251da177e4SLinus Torvalds  *  - don't just push entire root set; process in place
261da177e4SLinus Torvalds  *
271da177e4SLinus Torvalds  *	This program is free software; you can redistribute it and/or
281da177e4SLinus Torvalds  *	modify it under the terms of the GNU General Public License
291da177e4SLinus Torvalds  *	as published by the Free Software Foundation; either version
301da177e4SLinus Torvalds  *	2 of the License, or (at your option) any later version.
311da177e4SLinus Torvalds  *
321da177e4SLinus Torvalds  *  Fixes:
331da177e4SLinus Torvalds  *	Alan Cox	07 Sept	1997	Vmalloc internal stack as needed.
341da177e4SLinus Torvalds  *					Cope with changing max_files.
351da177e4SLinus Torvalds  *	Al Viro		11 Oct 1998
361da177e4SLinus Torvalds  *		Graph may have cycles. That is, we can send the descriptor
371da177e4SLinus Torvalds  *		of foo to bar and vice versa. Current code chokes on that.
381da177e4SLinus Torvalds  *		Fix: move SCM_RIGHTS ones into the separate list and then
391da177e4SLinus Torvalds  *		skb_free() them all instead of doing explicit fput's.
401da177e4SLinus Torvalds  *		Another problem: since fput() may block somebody may
411da177e4SLinus Torvalds  *		create a new unix_socket when we are in the middle of sweep
421da177e4SLinus Torvalds  *		phase. Fix: revert the logic wrt MARKED. Mark everything
431da177e4SLinus Torvalds  *		upon the beginning and unmark non-junk ones.
441da177e4SLinus Torvalds  *
451da177e4SLinus Torvalds  *		[12 Oct 1998] AAARGH! New code purges all SCM_RIGHTS
461da177e4SLinus Torvalds  *		sent to connect()'ed but still not accept()'ed sockets.
471da177e4SLinus Torvalds  *		Fixed. Old code had slightly different problem here:
481da177e4SLinus Torvalds  *		extra fput() in situation when we passed the descriptor via
491da177e4SLinus Torvalds  *		such socket and closed it (descriptor). That would happen on
501da177e4SLinus Torvalds  *		each unix_gc() until the accept(). Since the struct file in
511da177e4SLinus Torvalds  *		question would go to the free list and might be reused...
521da177e4SLinus Torvalds  *		That might be the reason of random oopses on filp_close()
531da177e4SLinus Torvalds  *		in unrelated processes.
541da177e4SLinus Torvalds  *
551da177e4SLinus Torvalds  *	AV		28 Feb 1999
561da177e4SLinus Torvalds  *		Kill the explicit allocation of stack. Now we keep the tree
571da177e4SLinus Torvalds  *		with root in dummy + pointer (gc_current) to one of the nodes.
581da177e4SLinus Torvalds  *		Stack is represented as path from gc_current to dummy. Unmark
591da177e4SLinus Torvalds  *		now means "add to tree". Push == "make it a son of gc_current".
601da177e4SLinus Torvalds  *		Pop == "move gc_current to parent". We keep only pointers to
611da177e4SLinus Torvalds  *		parents (->gc_tree).
621da177e4SLinus Torvalds  *	AV		1 Mar 1999
631da177e4SLinus Torvalds  *		Damn. Added missing check for ->dead in listen queues scanning.
641da177e4SLinus Torvalds  *
651da177e4SLinus Torvalds  */
661da177e4SLinus Torvalds 
671da177e4SLinus Torvalds #include <linux/kernel.h>
681da177e4SLinus Torvalds #include <linux/sched.h>
691da177e4SLinus Torvalds #include <linux/string.h>
701da177e4SLinus Torvalds #include <linux/socket.h>
711da177e4SLinus Torvalds #include <linux/un.h>
721da177e4SLinus Torvalds #include <linux/net.h>
731da177e4SLinus Torvalds #include <linux/fs.h>
741da177e4SLinus Torvalds #include <linux/slab.h>
751da177e4SLinus Torvalds #include <linux/skbuff.h>
761da177e4SLinus Torvalds #include <linux/netdevice.h>
771da177e4SLinus Torvalds #include <linux/file.h>
781da177e4SLinus Torvalds #include <linux/proc_fs.h>
79*4a3e2f71SArjan van de Ven #include <linux/mutex.h>
801da177e4SLinus Torvalds 
811da177e4SLinus Torvalds #include <net/sock.h>
821da177e4SLinus Torvalds #include <net/af_unix.h>
831da177e4SLinus Torvalds #include <net/scm.h>
84c752f073SArnaldo Carvalho de Melo #include <net/tcp_states.h>
851da177e4SLinus Torvalds 
861da177e4SLinus Torvalds /* Internal data structures and random procedures: */
871da177e4SLinus Torvalds 
881da177e4SLinus Torvalds #define GC_HEAD		((struct sock *)(-1))
891da177e4SLinus Torvalds #define GC_ORPHAN	((struct sock *)(-3))
901da177e4SLinus Torvalds 
911da177e4SLinus Torvalds static struct sock *gc_current = GC_HEAD; /* stack of objects to mark */
921da177e4SLinus Torvalds 
931da177e4SLinus Torvalds atomic_t unix_tot_inflight = ATOMIC_INIT(0);
941da177e4SLinus Torvalds 
951da177e4SLinus Torvalds 
961da177e4SLinus Torvalds static struct sock *unix_get_socket(struct file *filp)
971da177e4SLinus Torvalds {
981da177e4SLinus Torvalds 	struct sock *u_sock = NULL;
991da177e4SLinus Torvalds 	struct inode *inode = filp->f_dentry->d_inode;
1001da177e4SLinus Torvalds 
1011da177e4SLinus Torvalds 	/*
1021da177e4SLinus Torvalds 	 *	Socket ?
1031da177e4SLinus Torvalds 	 */
1041da177e4SLinus Torvalds 	if (S_ISSOCK(inode->i_mode)) {
1051da177e4SLinus Torvalds 		struct socket * sock = SOCKET_I(inode);
1061da177e4SLinus Torvalds 		struct sock * s = sock->sk;
1071da177e4SLinus Torvalds 
1081da177e4SLinus Torvalds 		/*
1091da177e4SLinus Torvalds 		 *	PF_UNIX ?
1101da177e4SLinus Torvalds 		 */
1111da177e4SLinus Torvalds 		if (s && sock->ops && sock->ops->family == PF_UNIX)
1121da177e4SLinus Torvalds 			u_sock = s;
1131da177e4SLinus Torvalds 	}
1141da177e4SLinus Torvalds 	return u_sock;
1151da177e4SLinus Torvalds }
1161da177e4SLinus Torvalds 
1171da177e4SLinus Torvalds /*
1181da177e4SLinus Torvalds  *	Keep the number of times in flight count for the file
1191da177e4SLinus Torvalds  *	descriptor if it is for an AF_UNIX socket.
1201da177e4SLinus Torvalds  */
1211da177e4SLinus Torvalds 
1221da177e4SLinus Torvalds void unix_inflight(struct file *fp)
1231da177e4SLinus Torvalds {
1241da177e4SLinus Torvalds 	struct sock *s = unix_get_socket(fp);
1251da177e4SLinus Torvalds 	if(s) {
1261da177e4SLinus Torvalds 		atomic_inc(&unix_sk(s)->inflight);
1271da177e4SLinus Torvalds 		atomic_inc(&unix_tot_inflight);
1281da177e4SLinus Torvalds 	}
1291da177e4SLinus Torvalds }
1301da177e4SLinus Torvalds 
1311da177e4SLinus Torvalds void unix_notinflight(struct file *fp)
1321da177e4SLinus Torvalds {
1331da177e4SLinus Torvalds 	struct sock *s = unix_get_socket(fp);
1341da177e4SLinus Torvalds 	if(s) {
1351da177e4SLinus Torvalds 		atomic_dec(&unix_sk(s)->inflight);
1361da177e4SLinus Torvalds 		atomic_dec(&unix_tot_inflight);
1371da177e4SLinus Torvalds 	}
1381da177e4SLinus Torvalds }
1391da177e4SLinus Torvalds 
1401da177e4SLinus Torvalds 
1411da177e4SLinus Torvalds /*
1421da177e4SLinus Torvalds  *	Garbage Collector Support Functions
1431da177e4SLinus Torvalds  */
1441da177e4SLinus Torvalds 
1451da177e4SLinus Torvalds static inline struct sock *pop_stack(void)
1461da177e4SLinus Torvalds {
1471da177e4SLinus Torvalds 	struct sock *p = gc_current;
1481da177e4SLinus Torvalds 	gc_current = unix_sk(p)->gc_tree;
1491da177e4SLinus Torvalds 	return p;
1501da177e4SLinus Torvalds }
1511da177e4SLinus Torvalds 
1521da177e4SLinus Torvalds static inline int empty_stack(void)
1531da177e4SLinus Torvalds {
1541da177e4SLinus Torvalds 	return gc_current == GC_HEAD;
1551da177e4SLinus Torvalds }
1561da177e4SLinus Torvalds 
1571da177e4SLinus Torvalds static void maybe_unmark_and_push(struct sock *x)
1581da177e4SLinus Torvalds {
1591da177e4SLinus Torvalds 	struct unix_sock *u = unix_sk(x);
1601da177e4SLinus Torvalds 
1611da177e4SLinus Torvalds 	if (u->gc_tree != GC_ORPHAN)
1621da177e4SLinus Torvalds 		return;
1631da177e4SLinus Torvalds 	sock_hold(x);
1641da177e4SLinus Torvalds 	u->gc_tree = gc_current;
1651da177e4SLinus Torvalds 	gc_current = x;
1661da177e4SLinus Torvalds }
1671da177e4SLinus Torvalds 
1681da177e4SLinus Torvalds 
1691da177e4SLinus Torvalds /* The external entry point: unix_gc() */
1701da177e4SLinus Torvalds 
1711da177e4SLinus Torvalds void unix_gc(void)
1721da177e4SLinus Torvalds {
173*4a3e2f71SArjan van de Ven 	static DEFINE_MUTEX(unix_gc_sem);
1741da177e4SLinus Torvalds 	int i;
1751da177e4SLinus Torvalds 	struct sock *s;
1761da177e4SLinus Torvalds 	struct sk_buff_head hitlist;
1771da177e4SLinus Torvalds 	struct sk_buff *skb;
1781da177e4SLinus Torvalds 
1791da177e4SLinus Torvalds 	/*
1801da177e4SLinus Torvalds 	 *	Avoid a recursive GC.
1811da177e4SLinus Torvalds 	 */
1821da177e4SLinus Torvalds 
183*4a3e2f71SArjan van de Ven 	if (!mutex_trylock(&unix_gc_sem))
1841da177e4SLinus Torvalds 		return;
1851da177e4SLinus Torvalds 
186fbe9cc4aSDavid S. Miller 	spin_lock(&unix_table_lock);
1871da177e4SLinus Torvalds 
1881da177e4SLinus Torvalds 	forall_unix_sockets(i, s)
1891da177e4SLinus Torvalds 	{
1901da177e4SLinus Torvalds 		unix_sk(s)->gc_tree = GC_ORPHAN;
1911da177e4SLinus Torvalds 	}
1921da177e4SLinus Torvalds 	/*
1931da177e4SLinus Torvalds 	 *	Everything is now marked
1941da177e4SLinus Torvalds 	 */
1951da177e4SLinus Torvalds 
1961da177e4SLinus Torvalds 	/* Invariant to be maintained:
1971da177e4SLinus Torvalds 		- everything unmarked is either:
1981da177e4SLinus Torvalds 		-- (a) on the stack, or
1991da177e4SLinus Torvalds 		-- (b) has all of its children unmarked
2001da177e4SLinus Torvalds 		- everything on the stack is always unmarked
2011da177e4SLinus Torvalds 		- nothing is ever pushed onto the stack twice, because:
2021da177e4SLinus Torvalds 		-- nothing previously unmarked is ever pushed on the stack
2031da177e4SLinus Torvalds 	 */
2041da177e4SLinus Torvalds 
2051da177e4SLinus Torvalds 	/*
2061da177e4SLinus Torvalds 	 *	Push root set
2071da177e4SLinus Torvalds 	 */
2081da177e4SLinus Torvalds 
2091da177e4SLinus Torvalds 	forall_unix_sockets(i, s)
2101da177e4SLinus Torvalds 	{
2111da177e4SLinus Torvalds 		int open_count = 0;
2121da177e4SLinus Torvalds 
2131da177e4SLinus Torvalds 		/*
2141da177e4SLinus Torvalds 		 *	If all instances of the descriptor are not
2151da177e4SLinus Torvalds 		 *	in flight we are in use.
2161da177e4SLinus Torvalds 		 *
2171da177e4SLinus Torvalds 		 *	Special case: when socket s is embrion, it may be
2181da177e4SLinus Torvalds 		 *	hashed but still not in queue of listening socket.
2191da177e4SLinus Torvalds 		 *	In this case (see unix_create1()) we set artificial
2201da177e4SLinus Torvalds 		 *	negative inflight counter to close race window.
2211da177e4SLinus Torvalds 		 *	It is trick of course and dirty one.
2221da177e4SLinus Torvalds 		 */
2231da177e4SLinus Torvalds 		if (s->sk_socket && s->sk_socket->file)
2241da177e4SLinus Torvalds 			open_count = file_count(s->sk_socket->file);
2251da177e4SLinus Torvalds 		if (open_count > atomic_read(&unix_sk(s)->inflight))
2261da177e4SLinus Torvalds 			maybe_unmark_and_push(s);
2271da177e4SLinus Torvalds 	}
2281da177e4SLinus Torvalds 
2291da177e4SLinus Torvalds 	/*
2301da177e4SLinus Torvalds 	 *	Mark phase
2311da177e4SLinus Torvalds 	 */
2321da177e4SLinus Torvalds 
2331da177e4SLinus Torvalds 	while (!empty_stack())
2341da177e4SLinus Torvalds 	{
2351da177e4SLinus Torvalds 		struct sock *x = pop_stack();
2361da177e4SLinus Torvalds 		struct sock *sk;
2371da177e4SLinus Torvalds 
2381da177e4SLinus Torvalds 		spin_lock(&x->sk_receive_queue.lock);
2391da177e4SLinus Torvalds 		skb = skb_peek(&x->sk_receive_queue);
2401da177e4SLinus Torvalds 
2411da177e4SLinus Torvalds 		/*
2421da177e4SLinus Torvalds 		 *	Loop through all but first born
2431da177e4SLinus Torvalds 		 */
2441da177e4SLinus Torvalds 
2451da177e4SLinus Torvalds 		while (skb && skb != (struct sk_buff *)&x->sk_receive_queue) {
2461da177e4SLinus Torvalds 			/*
2471da177e4SLinus Torvalds 			 *	Do we have file descriptors ?
2481da177e4SLinus Torvalds 			 */
2491da177e4SLinus Torvalds 			if(UNIXCB(skb).fp)
2501da177e4SLinus Torvalds 			{
2511da177e4SLinus Torvalds 				/*
2521da177e4SLinus Torvalds 				 *	Process the descriptors of this socket
2531da177e4SLinus Torvalds 				 */
2541da177e4SLinus Torvalds 				int nfd=UNIXCB(skb).fp->count;
2551da177e4SLinus Torvalds 				struct file **fp = UNIXCB(skb).fp->fp;
2561da177e4SLinus Torvalds 				while(nfd--)
2571da177e4SLinus Torvalds 				{
2581da177e4SLinus Torvalds 					/*
2591da177e4SLinus Torvalds 					 *	Get the socket the fd matches if
2601da177e4SLinus Torvalds 					 *	it indeed does so
2611da177e4SLinus Torvalds 					 */
2621da177e4SLinus Torvalds 					if((sk=unix_get_socket(*fp++))!=NULL)
2631da177e4SLinus Torvalds 					{
2641da177e4SLinus Torvalds 						maybe_unmark_and_push(sk);
2651da177e4SLinus Torvalds 					}
2661da177e4SLinus Torvalds 				}
2671da177e4SLinus Torvalds 			}
2681da177e4SLinus Torvalds 			/* We have to scan not-yet-accepted ones too */
2691da177e4SLinus Torvalds 			if (x->sk_state == TCP_LISTEN)
2701da177e4SLinus Torvalds 				maybe_unmark_and_push(skb->sk);
2711da177e4SLinus Torvalds 			skb=skb->next;
2721da177e4SLinus Torvalds 		}
2731da177e4SLinus Torvalds 		spin_unlock(&x->sk_receive_queue.lock);
2741da177e4SLinus Torvalds 		sock_put(x);
2751da177e4SLinus Torvalds 	}
2761da177e4SLinus Torvalds 
2771da177e4SLinus Torvalds 	skb_queue_head_init(&hitlist);
2781da177e4SLinus Torvalds 
2791da177e4SLinus Torvalds 	forall_unix_sockets(i, s)
2801da177e4SLinus Torvalds 	{
2811da177e4SLinus Torvalds 		struct unix_sock *u = unix_sk(s);
2821da177e4SLinus Torvalds 
2831da177e4SLinus Torvalds 		if (u->gc_tree == GC_ORPHAN) {
2841da177e4SLinus Torvalds 			struct sk_buff *nextsk;
2851da177e4SLinus Torvalds 
2861da177e4SLinus Torvalds 			spin_lock(&s->sk_receive_queue.lock);
2871da177e4SLinus Torvalds 			skb = skb_peek(&s->sk_receive_queue);
2881da177e4SLinus Torvalds 			while (skb &&
2891da177e4SLinus Torvalds 			       skb != (struct sk_buff *)&s->sk_receive_queue) {
2901da177e4SLinus Torvalds 				nextsk = skb->next;
2911da177e4SLinus Torvalds 				/*
2921da177e4SLinus Torvalds 				 *	Do we have file descriptors ?
2931da177e4SLinus Torvalds 				 */
2948728b834SDavid S. Miller 				if (UNIXCB(skb).fp) {
2958728b834SDavid S. Miller 					__skb_unlink(skb,
2968728b834SDavid S. Miller 						     &s->sk_receive_queue);
2971da177e4SLinus Torvalds 					__skb_queue_tail(&hitlist, skb);
2981da177e4SLinus Torvalds 				}
2991da177e4SLinus Torvalds 				skb = nextsk;
3001da177e4SLinus Torvalds 			}
3011da177e4SLinus Torvalds 			spin_unlock(&s->sk_receive_queue.lock);
3021da177e4SLinus Torvalds 		}
3031da177e4SLinus Torvalds 		u->gc_tree = GC_ORPHAN;
3041da177e4SLinus Torvalds 	}
305fbe9cc4aSDavid S. Miller 	spin_unlock(&unix_table_lock);
3061da177e4SLinus Torvalds 
3071da177e4SLinus Torvalds 	/*
3081da177e4SLinus Torvalds 	 *	Here we are. Hitlist is filled. Die.
3091da177e4SLinus Torvalds 	 */
3101da177e4SLinus Torvalds 
3111da177e4SLinus Torvalds 	__skb_queue_purge(&hitlist);
312*4a3e2f71SArjan van de Ven 	mutex_unlock(&unix_gc_sem);
3131da177e4SLinus Torvalds }
314