xref: /linux/net/unix/garbage.c (revision 6ba76fd2848e107594ea4f03b737230f74bc23ea)
1a85036f6SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds  * NET3:	Garbage Collector For AF_UNIX sockets
41da177e4SLinus Torvalds  *
51da177e4SLinus Torvalds  * Garbage Collector:
61da177e4SLinus Torvalds  *	Copyright (C) Barak A. Pearlmutter.
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  * Chopped about by Alan Cox 22/3/96 to make it fit the AF_UNIX socket problem.
91da177e4SLinus Torvalds  * If it doesn't work blame me, it worked when Barak sent it.
101da177e4SLinus Torvalds  *
111da177e4SLinus Torvalds  * Assumptions:
121da177e4SLinus Torvalds  *
131da177e4SLinus Torvalds  *  - object w/ a bit
141da177e4SLinus Torvalds  *  - free list
151da177e4SLinus Torvalds  *
161da177e4SLinus Torvalds  * Current optimizations:
171da177e4SLinus Torvalds  *
181da177e4SLinus Torvalds  *  - explicit stack instead of recursion
191da177e4SLinus Torvalds  *  - tail recurse on first born instead of immediate push/pop
201da177e4SLinus Torvalds  *  - we gather the stuff that should not be killed into tree
211da177e4SLinus Torvalds  *    and stack is just a path from root to the current pointer.
221da177e4SLinus Torvalds  *
231da177e4SLinus Torvalds  *  Future optimizations:
241da177e4SLinus Torvalds  *
251da177e4SLinus Torvalds  *  - don't just push entire root set; process in place
261da177e4SLinus Torvalds  *
271da177e4SLinus Torvalds  *  Fixes:
281da177e4SLinus Torvalds  *	Alan Cox	07 Sept	1997	Vmalloc internal stack as needed.
291da177e4SLinus Torvalds  *					Cope with changing max_files.
301da177e4SLinus Torvalds  *	Al Viro		11 Oct 1998
311da177e4SLinus Torvalds  *		Graph may have cycles. That is, we can send the descriptor
321da177e4SLinus Torvalds  *		of foo to bar and vice versa. Current code chokes on that.
331da177e4SLinus Torvalds  *		Fix: move SCM_RIGHTS ones into the separate list and then
341da177e4SLinus Torvalds  *		skb_free() them all instead of doing explicit fput's.
351da177e4SLinus Torvalds  *		Another problem: since fput() may block somebody may
361da177e4SLinus Torvalds  *		create a new unix_socket when we are in the middle of sweep
371da177e4SLinus Torvalds  *		phase. Fix: revert the logic wrt MARKED. Mark everything
381da177e4SLinus Torvalds  *		upon the beginning and unmark non-junk ones.
391da177e4SLinus Torvalds  *
401da177e4SLinus Torvalds  *		[12 Oct 1998] AAARGH! New code purges all SCM_RIGHTS
411da177e4SLinus Torvalds  *		sent to connect()'ed but still not accept()'ed sockets.
421da177e4SLinus Torvalds  *		Fixed. Old code had slightly different problem here:
431da177e4SLinus Torvalds  *		extra fput() in situation when we passed the descriptor via
441da177e4SLinus Torvalds  *		such socket and closed it (descriptor). That would happen on
451da177e4SLinus Torvalds  *		each unix_gc() until the accept(). Since the struct file in
461da177e4SLinus Torvalds  *		question would go to the free list and might be reused...
471da177e4SLinus Torvalds  *		That might be the reason of random oopses on filp_close()
481da177e4SLinus Torvalds  *		in unrelated processes.
491da177e4SLinus Torvalds  *
501da177e4SLinus Torvalds  *	AV		28 Feb 1999
511da177e4SLinus Torvalds  *		Kill the explicit allocation of stack. Now we keep the tree
521da177e4SLinus Torvalds  *		with root in dummy + pointer (gc_current) to one of the nodes.
531da177e4SLinus Torvalds  *		Stack is represented as path from gc_current to dummy. Unmark
541da177e4SLinus Torvalds  *		now means "add to tree". Push == "make it a son of gc_current".
551da177e4SLinus Torvalds  *		Pop == "move gc_current to parent". We keep only pointers to
561da177e4SLinus Torvalds  *		parents (->gc_tree).
571da177e4SLinus Torvalds  *	AV		1 Mar 1999
581da177e4SLinus Torvalds  *		Damn. Added missing check for ->dead in listen queues scanning.
591da177e4SLinus Torvalds  *
601fd05ba5SMiklos Szeredi  *	Miklos Szeredi 25 Jun 2007
611fd05ba5SMiklos Szeredi  *		Reimplement with a cycle collecting algorithm. This should
621fd05ba5SMiklos Szeredi  *		solve several problems with the previous code, like being racy
631fd05ba5SMiklos Szeredi  *		wrt receive and holding up unrelated socket operations.
641da177e4SLinus Torvalds  */
651da177e4SLinus Torvalds 
661da177e4SLinus Torvalds #include <linux/kernel.h>
671da177e4SLinus Torvalds #include <linux/string.h>
681da177e4SLinus Torvalds #include <linux/socket.h>
691da177e4SLinus Torvalds #include <linux/un.h>
701da177e4SLinus Torvalds #include <linux/net.h>
711da177e4SLinus Torvalds #include <linux/fs.h>
721da177e4SLinus Torvalds #include <linux/skbuff.h>
731da177e4SLinus Torvalds #include <linux/netdevice.h>
741da177e4SLinus Torvalds #include <linux/file.h>
751da177e4SLinus Torvalds #include <linux/proc_fs.h>
764a3e2f71SArjan van de Ven #include <linux/mutex.h>
775f23b734Sdann frazier #include <linux/wait.h>
781da177e4SLinus Torvalds 
791da177e4SLinus Torvalds #include <net/sock.h>
801da177e4SLinus Torvalds #include <net/af_unix.h>
811da177e4SLinus Torvalds #include <net/scm.h>
82c752f073SArnaldo Carvalho de Melo #include <net/tcp_states.h>
831da177e4SLinus Torvalds 
8499a7a5b9SKuniyuki Iwashima struct unix_sock *unix_get_socket(struct file *filp)
8599a7a5b9SKuniyuki Iwashima {
8699a7a5b9SKuniyuki Iwashima 	struct inode *inode = file_inode(filp);
87f4e65870SJens Axboe 
8899a7a5b9SKuniyuki Iwashima 	/* Socket ? */
8999a7a5b9SKuniyuki Iwashima 	if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
9099a7a5b9SKuniyuki Iwashima 		struct socket *sock = SOCKET_I(inode);
9199a7a5b9SKuniyuki Iwashima 		const struct proto_ops *ops;
9299a7a5b9SKuniyuki Iwashima 		struct sock *sk = sock->sk;
931da177e4SLinus Torvalds 
9499a7a5b9SKuniyuki Iwashima 		ops = READ_ONCE(sock->ops);
9599a7a5b9SKuniyuki Iwashima 
9699a7a5b9SKuniyuki Iwashima 		/* PF_UNIX ? */
9799a7a5b9SKuniyuki Iwashima 		if (sk && ops && ops->family == PF_UNIX)
9899a7a5b9SKuniyuki Iwashima 			return unix_sk(sk);
9999a7a5b9SKuniyuki Iwashima 	}
10099a7a5b9SKuniyuki Iwashima 
10199a7a5b9SKuniyuki Iwashima 	return NULL;
10299a7a5b9SKuniyuki Iwashima }
10399a7a5b9SKuniyuki Iwashima 
10442f298c0SKuniyuki Iwashima static LIST_HEAD(unix_unvisited_vertices);
10542f298c0SKuniyuki Iwashima 
106*6ba76fd2SKuniyuki Iwashima enum unix_vertex_index {
107*6ba76fd2SKuniyuki Iwashima 	UNIX_VERTEX_INDEX_UNVISITED,
108*6ba76fd2SKuniyuki Iwashima 	UNIX_VERTEX_INDEX_START,
109*6ba76fd2SKuniyuki Iwashima };
110*6ba76fd2SKuniyuki Iwashima 
11142f298c0SKuniyuki Iwashima static void unix_add_edge(struct scm_fp_list *fpl, struct unix_edge *edge)
11242f298c0SKuniyuki Iwashima {
11342f298c0SKuniyuki Iwashima 	struct unix_vertex *vertex = edge->predecessor->vertex;
11442f298c0SKuniyuki Iwashima 
11542f298c0SKuniyuki Iwashima 	if (!vertex) {
11642f298c0SKuniyuki Iwashima 		vertex = list_first_entry(&fpl->vertices, typeof(*vertex), entry);
11742f298c0SKuniyuki Iwashima 		vertex->out_degree = 0;
11842f298c0SKuniyuki Iwashima 		INIT_LIST_HEAD(&vertex->edges);
11942f298c0SKuniyuki Iwashima 
12042f298c0SKuniyuki Iwashima 		list_move_tail(&vertex->entry, &unix_unvisited_vertices);
12142f298c0SKuniyuki Iwashima 		edge->predecessor->vertex = vertex;
12242f298c0SKuniyuki Iwashima 	}
12342f298c0SKuniyuki Iwashima 
12442f298c0SKuniyuki Iwashima 	vertex->out_degree++;
12542f298c0SKuniyuki Iwashima 	list_add_tail(&edge->vertex_entry, &vertex->edges);
12642f298c0SKuniyuki Iwashima }
12742f298c0SKuniyuki Iwashima 
12842f298c0SKuniyuki Iwashima static void unix_del_edge(struct scm_fp_list *fpl, struct unix_edge *edge)
12942f298c0SKuniyuki Iwashima {
13042f298c0SKuniyuki Iwashima 	struct unix_vertex *vertex = edge->predecessor->vertex;
13142f298c0SKuniyuki Iwashima 
13242f298c0SKuniyuki Iwashima 	list_del(&edge->vertex_entry);
13342f298c0SKuniyuki Iwashima 	vertex->out_degree--;
13442f298c0SKuniyuki Iwashima 
13542f298c0SKuniyuki Iwashima 	if (!vertex->out_degree) {
13642f298c0SKuniyuki Iwashima 		edge->predecessor->vertex = NULL;
13742f298c0SKuniyuki Iwashima 		list_move_tail(&vertex->entry, &fpl->vertices);
13842f298c0SKuniyuki Iwashima 	}
13942f298c0SKuniyuki Iwashima }
14042f298c0SKuniyuki Iwashima 
1411fbfdfaaSKuniyuki Iwashima static void unix_free_vertices(struct scm_fp_list *fpl)
1421fbfdfaaSKuniyuki Iwashima {
1431fbfdfaaSKuniyuki Iwashima 	struct unix_vertex *vertex, *next_vertex;
1441fbfdfaaSKuniyuki Iwashima 
1451fbfdfaaSKuniyuki Iwashima 	list_for_each_entry_safe(vertex, next_vertex, &fpl->vertices, entry) {
1461fbfdfaaSKuniyuki Iwashima 		list_del(&vertex->entry);
1471fbfdfaaSKuniyuki Iwashima 		kfree(vertex);
1481fbfdfaaSKuniyuki Iwashima 	}
1491fbfdfaaSKuniyuki Iwashima }
1501fbfdfaaSKuniyuki Iwashima 
15142f298c0SKuniyuki Iwashima DEFINE_SPINLOCK(unix_gc_lock);
15222c3c0c5SKuniyuki Iwashima unsigned int unix_tot_inflight;
15342f298c0SKuniyuki Iwashima 
15442f298c0SKuniyuki Iwashima void unix_add_edges(struct scm_fp_list *fpl, struct unix_sock *receiver)
15542f298c0SKuniyuki Iwashima {
15642f298c0SKuniyuki Iwashima 	int i = 0, j = 0;
15742f298c0SKuniyuki Iwashima 
15842f298c0SKuniyuki Iwashima 	spin_lock(&unix_gc_lock);
15942f298c0SKuniyuki Iwashima 
16042f298c0SKuniyuki Iwashima 	if (!fpl->count_unix)
16142f298c0SKuniyuki Iwashima 		goto out;
16242f298c0SKuniyuki Iwashima 
16342f298c0SKuniyuki Iwashima 	do {
16442f298c0SKuniyuki Iwashima 		struct unix_sock *inflight = unix_get_socket(fpl->fp[j++]);
16542f298c0SKuniyuki Iwashima 		struct unix_edge *edge;
16642f298c0SKuniyuki Iwashima 
16742f298c0SKuniyuki Iwashima 		if (!inflight)
16842f298c0SKuniyuki Iwashima 			continue;
16942f298c0SKuniyuki Iwashima 
17042f298c0SKuniyuki Iwashima 		edge = fpl->edges + i++;
17142f298c0SKuniyuki Iwashima 		edge->predecessor = inflight;
17242f298c0SKuniyuki Iwashima 		edge->successor = receiver;
17342f298c0SKuniyuki Iwashima 
17442f298c0SKuniyuki Iwashima 		unix_add_edge(fpl, edge);
17542f298c0SKuniyuki Iwashima 	} while (i < fpl->count_unix);
17642f298c0SKuniyuki Iwashima 
17722c3c0c5SKuniyuki Iwashima 	WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + fpl->count_unix);
17842f298c0SKuniyuki Iwashima out:
17922c3c0c5SKuniyuki Iwashima 	WRITE_ONCE(fpl->user->unix_inflight, fpl->user->unix_inflight + fpl->count);
18022c3c0c5SKuniyuki Iwashima 
18142f298c0SKuniyuki Iwashima 	spin_unlock(&unix_gc_lock);
18242f298c0SKuniyuki Iwashima 
18342f298c0SKuniyuki Iwashima 	fpl->inflight = true;
18442f298c0SKuniyuki Iwashima 
18542f298c0SKuniyuki Iwashima 	unix_free_vertices(fpl);
18642f298c0SKuniyuki Iwashima }
18742f298c0SKuniyuki Iwashima 
18842f298c0SKuniyuki Iwashima void unix_del_edges(struct scm_fp_list *fpl)
18942f298c0SKuniyuki Iwashima {
19042f298c0SKuniyuki Iwashima 	int i = 0;
19142f298c0SKuniyuki Iwashima 
19242f298c0SKuniyuki Iwashima 	spin_lock(&unix_gc_lock);
19342f298c0SKuniyuki Iwashima 
19442f298c0SKuniyuki Iwashima 	if (!fpl->count_unix)
19542f298c0SKuniyuki Iwashima 		goto out;
19642f298c0SKuniyuki Iwashima 
19742f298c0SKuniyuki Iwashima 	do {
19842f298c0SKuniyuki Iwashima 		struct unix_edge *edge = fpl->edges + i++;
19942f298c0SKuniyuki Iwashima 
20042f298c0SKuniyuki Iwashima 		unix_del_edge(fpl, edge);
20142f298c0SKuniyuki Iwashima 	} while (i < fpl->count_unix);
20242f298c0SKuniyuki Iwashima 
20322c3c0c5SKuniyuki Iwashima 	WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - fpl->count_unix);
20442f298c0SKuniyuki Iwashima out:
20522c3c0c5SKuniyuki Iwashima 	WRITE_ONCE(fpl->user->unix_inflight, fpl->user->unix_inflight - fpl->count);
20622c3c0c5SKuniyuki Iwashima 
20742f298c0SKuniyuki Iwashima 	spin_unlock(&unix_gc_lock);
20842f298c0SKuniyuki Iwashima 
20942f298c0SKuniyuki Iwashima 	fpl->inflight = false;
21042f298c0SKuniyuki Iwashima }
21142f298c0SKuniyuki Iwashima 
2121fbfdfaaSKuniyuki Iwashima int unix_prepare_fpl(struct scm_fp_list *fpl)
2131fbfdfaaSKuniyuki Iwashima {
2141fbfdfaaSKuniyuki Iwashima 	struct unix_vertex *vertex;
2151fbfdfaaSKuniyuki Iwashima 	int i;
2161fbfdfaaSKuniyuki Iwashima 
2171fbfdfaaSKuniyuki Iwashima 	if (!fpl->count_unix)
2181fbfdfaaSKuniyuki Iwashima 		return 0;
2191fbfdfaaSKuniyuki Iwashima 
2201fbfdfaaSKuniyuki Iwashima 	for (i = 0; i < fpl->count_unix; i++) {
2211fbfdfaaSKuniyuki Iwashima 		vertex = kmalloc(sizeof(*vertex), GFP_KERNEL);
2221fbfdfaaSKuniyuki Iwashima 		if (!vertex)
2231fbfdfaaSKuniyuki Iwashima 			goto err;
2241fbfdfaaSKuniyuki Iwashima 
2251fbfdfaaSKuniyuki Iwashima 		list_add(&vertex->entry, &fpl->vertices);
2261fbfdfaaSKuniyuki Iwashima 	}
2271fbfdfaaSKuniyuki Iwashima 
22829b64e35SKuniyuki Iwashima 	fpl->edges = kvmalloc_array(fpl->count_unix, sizeof(*fpl->edges),
22929b64e35SKuniyuki Iwashima 				    GFP_KERNEL_ACCOUNT);
23029b64e35SKuniyuki Iwashima 	if (!fpl->edges)
23129b64e35SKuniyuki Iwashima 		goto err;
23229b64e35SKuniyuki Iwashima 
2331fbfdfaaSKuniyuki Iwashima 	return 0;
2341fbfdfaaSKuniyuki Iwashima 
2351fbfdfaaSKuniyuki Iwashima err:
2361fbfdfaaSKuniyuki Iwashima 	unix_free_vertices(fpl);
2371fbfdfaaSKuniyuki Iwashima 	return -ENOMEM;
2381fbfdfaaSKuniyuki Iwashima }
2391fbfdfaaSKuniyuki Iwashima 
2401fbfdfaaSKuniyuki Iwashima void unix_destroy_fpl(struct scm_fp_list *fpl)
2411fbfdfaaSKuniyuki Iwashima {
24242f298c0SKuniyuki Iwashima 	if (fpl->inflight)
24342f298c0SKuniyuki Iwashima 		unix_del_edges(fpl);
24442f298c0SKuniyuki Iwashima 
24529b64e35SKuniyuki Iwashima 	kvfree(fpl->edges);
2461fbfdfaaSKuniyuki Iwashima 	unix_free_vertices(fpl);
2471fbfdfaaSKuniyuki Iwashima }
2481fbfdfaaSKuniyuki Iwashima 
249*6ba76fd2SKuniyuki Iwashima static LIST_HEAD(unix_visited_vertices);
250*6ba76fd2SKuniyuki Iwashima 
251*6ba76fd2SKuniyuki Iwashima static void __unix_walk_scc(struct unix_vertex *vertex)
252*6ba76fd2SKuniyuki Iwashima {
253*6ba76fd2SKuniyuki Iwashima 	unsigned long index = UNIX_VERTEX_INDEX_START;
254*6ba76fd2SKuniyuki Iwashima 	struct unix_edge *edge;
255*6ba76fd2SKuniyuki Iwashima 	LIST_HEAD(edge_stack);
256*6ba76fd2SKuniyuki Iwashima 
257*6ba76fd2SKuniyuki Iwashima next_vertex:
258*6ba76fd2SKuniyuki Iwashima 	vertex->index = index;
259*6ba76fd2SKuniyuki Iwashima 	index++;
260*6ba76fd2SKuniyuki Iwashima 
261*6ba76fd2SKuniyuki Iwashima 	/* Explore neighbour vertices (receivers of the current vertex's fd). */
262*6ba76fd2SKuniyuki Iwashima 	list_for_each_entry(edge, &vertex->edges, vertex_entry) {
263*6ba76fd2SKuniyuki Iwashima 		struct unix_vertex *next_vertex = edge->successor->vertex;
264*6ba76fd2SKuniyuki Iwashima 
265*6ba76fd2SKuniyuki Iwashima 		if (!next_vertex)
266*6ba76fd2SKuniyuki Iwashima 			continue;
267*6ba76fd2SKuniyuki Iwashima 
268*6ba76fd2SKuniyuki Iwashima 		if (next_vertex->index == UNIX_VERTEX_INDEX_UNVISITED) {
269*6ba76fd2SKuniyuki Iwashima 			/* Iterative deepening depth first search
270*6ba76fd2SKuniyuki Iwashima 			 *
271*6ba76fd2SKuniyuki Iwashima 			 *   1. Push a forward edge to edge_stack and set
272*6ba76fd2SKuniyuki Iwashima 			 *      the successor to vertex for the next iteration.
273*6ba76fd2SKuniyuki Iwashima 			 */
274*6ba76fd2SKuniyuki Iwashima 			list_add(&edge->stack_entry, &edge_stack);
275*6ba76fd2SKuniyuki Iwashima 
276*6ba76fd2SKuniyuki Iwashima 			vertex = next_vertex;
277*6ba76fd2SKuniyuki Iwashima 			goto next_vertex;
278*6ba76fd2SKuniyuki Iwashima 
279*6ba76fd2SKuniyuki Iwashima 			/*   2. Pop the edge directed to the current vertex
280*6ba76fd2SKuniyuki Iwashima 			 *      and restore the ancestor for backtracking.
281*6ba76fd2SKuniyuki Iwashima 			 */
282*6ba76fd2SKuniyuki Iwashima prev_vertex:
283*6ba76fd2SKuniyuki Iwashima 			edge = list_first_entry(&edge_stack, typeof(*edge), stack_entry);
284*6ba76fd2SKuniyuki Iwashima 			list_del_init(&edge->stack_entry);
285*6ba76fd2SKuniyuki Iwashima 
286*6ba76fd2SKuniyuki Iwashima 			vertex = edge->predecessor->vertex;
287*6ba76fd2SKuniyuki Iwashima 		}
288*6ba76fd2SKuniyuki Iwashima 	}
289*6ba76fd2SKuniyuki Iwashima 
290*6ba76fd2SKuniyuki Iwashima 	/* Don't restart DFS from this vertex in unix_walk_scc(). */
291*6ba76fd2SKuniyuki Iwashima 	list_move_tail(&vertex->entry, &unix_visited_vertices);
292*6ba76fd2SKuniyuki Iwashima 
293*6ba76fd2SKuniyuki Iwashima 	/* Need backtracking ? */
294*6ba76fd2SKuniyuki Iwashima 	if (!list_empty(&edge_stack))
295*6ba76fd2SKuniyuki Iwashima 		goto prev_vertex;
296*6ba76fd2SKuniyuki Iwashima }
297*6ba76fd2SKuniyuki Iwashima 
298*6ba76fd2SKuniyuki Iwashima static void unix_walk_scc(void)
299*6ba76fd2SKuniyuki Iwashima {
300*6ba76fd2SKuniyuki Iwashima 	struct unix_vertex *vertex;
301*6ba76fd2SKuniyuki Iwashima 
302*6ba76fd2SKuniyuki Iwashima 	list_for_each_entry(vertex, &unix_unvisited_vertices, entry)
303*6ba76fd2SKuniyuki Iwashima 		vertex->index = UNIX_VERTEX_INDEX_UNVISITED;
304*6ba76fd2SKuniyuki Iwashima 
305*6ba76fd2SKuniyuki Iwashima 	/* Visit every vertex exactly once.
306*6ba76fd2SKuniyuki Iwashima 	 * __unix_walk_scc() moves visited vertices to unix_visited_vertices.
307*6ba76fd2SKuniyuki Iwashima 	 */
308*6ba76fd2SKuniyuki Iwashima 	while (!list_empty(&unix_unvisited_vertices)) {
309*6ba76fd2SKuniyuki Iwashima 		vertex = list_first_entry(&unix_unvisited_vertices, typeof(*vertex), entry);
310*6ba76fd2SKuniyuki Iwashima 		__unix_walk_scc(vertex);
311*6ba76fd2SKuniyuki Iwashima 	}
312*6ba76fd2SKuniyuki Iwashima 
313*6ba76fd2SKuniyuki Iwashima 	list_replace_init(&unix_visited_vertices, &unix_unvisited_vertices);
314*6ba76fd2SKuniyuki Iwashima }
315*6ba76fd2SKuniyuki Iwashima 
3161fd05ba5SMiklos Szeredi static LIST_HEAD(gc_candidates);
31799a7a5b9SKuniyuki Iwashima static LIST_HEAD(gc_inflight_list);
31899a7a5b9SKuniyuki Iwashima 
31999a7a5b9SKuniyuki Iwashima /* Keep the number of times in flight count for the file
32099a7a5b9SKuniyuki Iwashima  * descriptor if it is for an AF_UNIX socket.
32199a7a5b9SKuniyuki Iwashima  */
32299a7a5b9SKuniyuki Iwashima void unix_inflight(struct user_struct *user, struct file *filp)
32399a7a5b9SKuniyuki Iwashima {
32499a7a5b9SKuniyuki Iwashima 	struct unix_sock *u = unix_get_socket(filp);
32599a7a5b9SKuniyuki Iwashima 
32699a7a5b9SKuniyuki Iwashima 	spin_lock(&unix_gc_lock);
32799a7a5b9SKuniyuki Iwashima 
32899a7a5b9SKuniyuki Iwashima 	if (u) {
32999a7a5b9SKuniyuki Iwashima 		if (!u->inflight) {
33099a7a5b9SKuniyuki Iwashima 			WARN_ON_ONCE(!list_empty(&u->link));
33199a7a5b9SKuniyuki Iwashima 			list_add_tail(&u->link, &gc_inflight_list);
33299a7a5b9SKuniyuki Iwashima 		} else {
33399a7a5b9SKuniyuki Iwashima 			WARN_ON_ONCE(list_empty(&u->link));
33499a7a5b9SKuniyuki Iwashima 		}
33599a7a5b9SKuniyuki Iwashima 		u->inflight++;
33699a7a5b9SKuniyuki Iwashima 	}
33799a7a5b9SKuniyuki Iwashima 
33899a7a5b9SKuniyuki Iwashima 	spin_unlock(&unix_gc_lock);
33999a7a5b9SKuniyuki Iwashima }
34099a7a5b9SKuniyuki Iwashima 
34199a7a5b9SKuniyuki Iwashima void unix_notinflight(struct user_struct *user, struct file *filp)
34299a7a5b9SKuniyuki Iwashima {
34399a7a5b9SKuniyuki Iwashima 	struct unix_sock *u = unix_get_socket(filp);
34499a7a5b9SKuniyuki Iwashima 
34599a7a5b9SKuniyuki Iwashima 	spin_lock(&unix_gc_lock);
34699a7a5b9SKuniyuki Iwashima 
34799a7a5b9SKuniyuki Iwashima 	if (u) {
34899a7a5b9SKuniyuki Iwashima 		WARN_ON_ONCE(!u->inflight);
34999a7a5b9SKuniyuki Iwashima 		WARN_ON_ONCE(list_empty(&u->link));
35099a7a5b9SKuniyuki Iwashima 
35199a7a5b9SKuniyuki Iwashima 		u->inflight--;
35299a7a5b9SKuniyuki Iwashima 		if (!u->inflight)
35399a7a5b9SKuniyuki Iwashima 			list_del_init(&u->link);
35499a7a5b9SKuniyuki Iwashima 	}
35599a7a5b9SKuniyuki Iwashima 
35699a7a5b9SKuniyuki Iwashima 	spin_unlock(&unix_gc_lock);
35799a7a5b9SKuniyuki Iwashima }
3581da177e4SLinus Torvalds 
3595c80f1aeSPavel Emelyanov static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
3601fd05ba5SMiklos Szeredi 			  struct sk_buff_head *hitlist)
3611da177e4SLinus Torvalds {
3621da177e4SLinus Torvalds 	struct sk_buff *skb;
3631fd05ba5SMiklos Szeredi 	struct sk_buff *next;
3641da177e4SLinus Torvalds 
3651da177e4SLinus Torvalds 	spin_lock(&x->sk_receive_queue.lock);
366a2f3be17SIlpo Järvinen 	skb_queue_walk_safe(&x->sk_receive_queue, skb, next) {
367d1ab39f1SJason Eastman 		/* Do we have file descriptors ? */
3681fd05ba5SMiklos Szeredi 		if (UNIXCB(skb).fp) {
3691fd05ba5SMiklos Szeredi 			bool hit = false;
370d1ab39f1SJason Eastman 			/* Process the descriptors of this socket */
3711da177e4SLinus Torvalds 			int nfd = UNIXCB(skb).fp->count;
3721da177e4SLinus Torvalds 			struct file **fp = UNIXCB(skb).fp->fp;
373d1ab39f1SJason Eastman 
3741fd05ba5SMiklos Szeredi 			while (nfd--) {
375d1ab39f1SJason Eastman 				/* Get the socket the fd matches if it indeed does so */
3765b17307bSKuniyuki Iwashima 				struct unix_sock *u = unix_get_socket(*fp++);
377d1ab39f1SJason Eastman 
3785b17307bSKuniyuki Iwashima 				/* Ignore non-candidates, they could have been added
3795b17307bSKuniyuki Iwashima 				 * to the queues after starting the garbage collection
3806209344fSMiklos Szeredi 				 */
3815b17307bSKuniyuki Iwashima 				if (u && test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) {
3821fd05ba5SMiklos Szeredi 					hit = true;
383d1ab39f1SJason Eastman 
3846209344fSMiklos Szeredi 					func(u);
3856209344fSMiklos Szeredi 				}
3861da177e4SLinus Torvalds 			}
3871fd05ba5SMiklos Szeredi 			if (hit && hitlist != NULL) {
3881fd05ba5SMiklos Szeredi 				__skb_unlink(skb, &x->sk_receive_queue);
3891fd05ba5SMiklos Szeredi 				__skb_queue_tail(hitlist, skb);
3901da177e4SLinus Torvalds 			}
3911fd05ba5SMiklos Szeredi 		}
3921da177e4SLinus Torvalds 	}
3931da177e4SLinus Torvalds 	spin_unlock(&x->sk_receive_queue.lock);
3941da177e4SLinus Torvalds }
3951da177e4SLinus Torvalds 
3965c80f1aeSPavel Emelyanov static void scan_children(struct sock *x, void (*func)(struct unix_sock *),
3971fd05ba5SMiklos Szeredi 			  struct sk_buff_head *hitlist)
3981da177e4SLinus Torvalds {
399d1ab39f1SJason Eastman 	if (x->sk_state != TCP_LISTEN) {
4001fd05ba5SMiklos Szeredi 		scan_inflight(x, func, hitlist);
401d1ab39f1SJason Eastman 	} else {
4021fd05ba5SMiklos Szeredi 		struct sk_buff *skb;
4031fd05ba5SMiklos Szeredi 		struct sk_buff *next;
4041fd05ba5SMiklos Szeredi 		struct unix_sock *u;
4051fd05ba5SMiklos Szeredi 		LIST_HEAD(embryos);
4061da177e4SLinus Torvalds 
407d1ab39f1SJason Eastman 		/* For a listening socket collect the queued embryos
4081fd05ba5SMiklos Szeredi 		 * and perform a scan on them as well.
4091da177e4SLinus Torvalds 		 */
4101fd05ba5SMiklos Szeredi 		spin_lock(&x->sk_receive_queue.lock);
411a2f3be17SIlpo Järvinen 		skb_queue_walk_safe(&x->sk_receive_queue, skb, next) {
4121fd05ba5SMiklos Szeredi 			u = unix_sk(skb->sk);
4131da177e4SLinus Torvalds 
414d1ab39f1SJason Eastman 			/* An embryo cannot be in-flight, so it's safe
4151fd05ba5SMiklos Szeredi 			 * to use the list link.
4161fd05ba5SMiklos Szeredi 			 */
417d0f6dc26SKuniyuki Iwashima 			WARN_ON_ONCE(!list_empty(&u->link));
4181fd05ba5SMiklos Szeredi 			list_add_tail(&u->link, &embryos);
4191fd05ba5SMiklos Szeredi 		}
4201fd05ba5SMiklos Szeredi 		spin_unlock(&x->sk_receive_queue.lock);
4211fd05ba5SMiklos Szeredi 
4221fd05ba5SMiklos Szeredi 		while (!list_empty(&embryos)) {
4231fd05ba5SMiklos Szeredi 			u = list_entry(embryos.next, struct unix_sock, link);
4241fd05ba5SMiklos Szeredi 			scan_inflight(&u->sk, func, hitlist);
4251fd05ba5SMiklos Szeredi 			list_del_init(&u->link);
4261fd05ba5SMiklos Szeredi 		}
4271fd05ba5SMiklos Szeredi 	}
4281fd05ba5SMiklos Szeredi }
4291fd05ba5SMiklos Szeredi 
4305c80f1aeSPavel Emelyanov static void dec_inflight(struct unix_sock *usk)
4311fd05ba5SMiklos Szeredi {
43297af84a6SKuniyuki Iwashima 	usk->inflight--;
4331fd05ba5SMiklos Szeredi }
4341fd05ba5SMiklos Szeredi 
4355c80f1aeSPavel Emelyanov static void inc_inflight(struct unix_sock *usk)
4361fd05ba5SMiklos Szeredi {
43797af84a6SKuniyuki Iwashima 	usk->inflight++;
4381fd05ba5SMiklos Szeredi }
4391fd05ba5SMiklos Szeredi 
4405c80f1aeSPavel Emelyanov static void inc_inflight_move_tail(struct unix_sock *u)
4411fd05ba5SMiklos Szeredi {
44297af84a6SKuniyuki Iwashima 	u->inflight++;
44397af84a6SKuniyuki Iwashima 
444d1ab39f1SJason Eastman 	/* If this still might be part of a cycle, move it to the end
4456209344fSMiklos Szeredi 	 * of the list, so that it's checked even if it was already
4466209344fSMiklos Szeredi 	 * passed over
4471fd05ba5SMiklos Szeredi 	 */
44860bc851aSEric Dumazet 	if (test_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags))
4491fd05ba5SMiklos Szeredi 		list_move_tail(&u->link, &gc_candidates);
4501fd05ba5SMiklos Szeredi }
4511fd05ba5SMiklos Szeredi 
452505e907dSFabian Frederick static bool gc_in_progress;
4531fd05ba5SMiklos Szeredi 
4548b90a9f8SKuniyuki Iwashima static void __unix_gc(struct work_struct *work)
4555f23b734Sdann frazier {
4561fd05ba5SMiklos Szeredi 	struct sk_buff_head hitlist;
45711498715SKuniyuki Iwashima 	struct unix_sock *u, *next;
4586209344fSMiklos Szeredi 	LIST_HEAD(not_cycle_list);
45911498715SKuniyuki Iwashima 	struct list_head cursor;
4601fd05ba5SMiklos Szeredi 
4611fd05ba5SMiklos Szeredi 	spin_lock(&unix_gc_lock);
4621fd05ba5SMiklos Szeredi 
463*6ba76fd2SKuniyuki Iwashima 	unix_walk_scc();
464*6ba76fd2SKuniyuki Iwashima 
465d1ab39f1SJason Eastman 	/* First, select candidates for garbage collection.  Only
4661fd05ba5SMiklos Szeredi 	 * in-flight sockets are considered, and from those only ones
4671fd05ba5SMiklos Szeredi 	 * which don't have any external reference.
4681fd05ba5SMiklos Szeredi 	 *
4691fd05ba5SMiklos Szeredi 	 * Holding unix_gc_lock will protect these candidates from
4701fd05ba5SMiklos Szeredi 	 * being detached, and hence from gaining an external
4716209344fSMiklos Szeredi 	 * reference.  Since there are no possible receivers, all
4726209344fSMiklos Szeredi 	 * buffers currently on the candidates' queues stay there
4736209344fSMiklos Szeredi 	 * during the garbage collection.
4746209344fSMiklos Szeredi 	 *
4756209344fSMiklos Szeredi 	 * We also know that no new candidate can be added onto the
4766209344fSMiklos Szeredi 	 * receive queues.  Other, non candidate sockets _can_ be
4776209344fSMiklos Szeredi 	 * added to queue, so we must make sure only to touch
4786209344fSMiklos Szeredi 	 * candidates.
4791fd05ba5SMiklos Szeredi 	 */
4801fd05ba5SMiklos Szeredi 	list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
481516e0cc5SAl Viro 		long total_refs;
4821fd05ba5SMiklos Szeredi 
4831fd05ba5SMiklos Szeredi 		total_refs = file_count(u->sk.sk_socket->file);
4841fd05ba5SMiklos Szeredi 
485d0f6dc26SKuniyuki Iwashima 		WARN_ON_ONCE(!u->inflight);
486d0f6dc26SKuniyuki Iwashima 		WARN_ON_ONCE(total_refs < u->inflight);
48797af84a6SKuniyuki Iwashima 		if (total_refs == u->inflight) {
4881fd05ba5SMiklos Szeredi 			list_move_tail(&u->link, &gc_candidates);
48960bc851aSEric Dumazet 			__set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
49060bc851aSEric Dumazet 			__set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
4911fd05ba5SMiklos Szeredi 		}
4921fd05ba5SMiklos Szeredi 	}
4931fd05ba5SMiklos Szeredi 
494d1ab39f1SJason Eastman 	/* Now remove all internal in-flight reference to children of
4951fd05ba5SMiklos Szeredi 	 * the candidates.
4961fd05ba5SMiklos Szeredi 	 */
4971fd05ba5SMiklos Szeredi 	list_for_each_entry(u, &gc_candidates, link)
4981fd05ba5SMiklos Szeredi 		scan_children(&u->sk, dec_inflight, NULL);
4991fd05ba5SMiklos Szeredi 
500d1ab39f1SJason Eastman 	/* Restore the references for children of all candidates,
5011fd05ba5SMiklos Szeredi 	 * which have remaining references.  Do this recursively, so
5021fd05ba5SMiklos Szeredi 	 * only those remain, which form cyclic references.
5031fd05ba5SMiklos Szeredi 	 *
5041fd05ba5SMiklos Szeredi 	 * Use a "cursor" link, to make the list traversal safe, even
5051fd05ba5SMiklos Szeredi 	 * though elements might be moved about.
5061fd05ba5SMiklos Szeredi 	 */
5071fd05ba5SMiklos Szeredi 	list_add(&cursor, &gc_candidates);
5081fd05ba5SMiklos Szeredi 	while (cursor.next != &gc_candidates) {
5091fd05ba5SMiklos Szeredi 		u = list_entry(cursor.next, struct unix_sock, link);
5101fd05ba5SMiklos Szeredi 
5111fd05ba5SMiklos Szeredi 		/* Move cursor to after the current position. */
5121fd05ba5SMiklos Szeredi 		list_move(&cursor, &u->link);
5131fd05ba5SMiklos Szeredi 
51497af84a6SKuniyuki Iwashima 		if (u->inflight) {
5156209344fSMiklos Szeredi 			list_move_tail(&u->link, &not_cycle_list);
51660bc851aSEric Dumazet 			__clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
5171fd05ba5SMiklos Szeredi 			scan_children(&u->sk, inc_inflight_move_tail, NULL);
5181fd05ba5SMiklos Szeredi 		}
5191fd05ba5SMiklos Szeredi 	}
5201fd05ba5SMiklos Szeredi 	list_del(&cursor);
5211fd05ba5SMiklos Szeredi 
5227df9c246SAndrey Ulanov 	/* Now gc_candidates contains only garbage.  Restore original
5237df9c246SAndrey Ulanov 	 * inflight counters for these as well, and remove the skbuffs
5247df9c246SAndrey Ulanov 	 * which are creating the cycle(s).
5257df9c246SAndrey Ulanov 	 */
5267df9c246SAndrey Ulanov 	skb_queue_head_init(&hitlist);
527aa82ac51SKuniyuki Iwashima 	list_for_each_entry(u, &gc_candidates, link) {
5287df9c246SAndrey Ulanov 		scan_children(&u->sk, inc_inflight, &hitlist);
5297df9c246SAndrey Ulanov 
530aa82ac51SKuniyuki Iwashima #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
531aa82ac51SKuniyuki Iwashima 		if (u->oob_skb) {
532aa82ac51SKuniyuki Iwashima 			kfree_skb(u->oob_skb);
533aa82ac51SKuniyuki Iwashima 			u->oob_skb = NULL;
534aa82ac51SKuniyuki Iwashima 		}
535aa82ac51SKuniyuki Iwashima #endif
536aa82ac51SKuniyuki Iwashima 	}
537aa82ac51SKuniyuki Iwashima 
538d1ab39f1SJason Eastman 	/* not_cycle_list contains those sockets which do not make up a
5396209344fSMiklos Szeredi 	 * cycle.  Restore these to the inflight list.
5406209344fSMiklos Szeredi 	 */
5416209344fSMiklos Szeredi 	while (!list_empty(&not_cycle_list)) {
5426209344fSMiklos Szeredi 		u = list_entry(not_cycle_list.next, struct unix_sock, link);
54360bc851aSEric Dumazet 		__clear_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
5446209344fSMiklos Szeredi 		list_move_tail(&u->link, &gc_inflight_list);
5456209344fSMiklos Szeredi 	}
5466209344fSMiklos Szeredi 
5471fd05ba5SMiklos Szeredi 	spin_unlock(&unix_gc_lock);
5481fd05ba5SMiklos Szeredi 
5491fd05ba5SMiklos Szeredi 	/* Here we are. Hitlist is filled. Die. */
5501da177e4SLinus Torvalds 	__skb_queue_purge(&hitlist);
5511fd05ba5SMiklos Szeredi 
5521fd05ba5SMiklos Szeredi 	spin_lock(&unix_gc_lock);
5531fd05ba5SMiklos Szeredi 
5541fd05ba5SMiklos Szeredi 	/* All candidates should have been detached by now. */
555d0f6dc26SKuniyuki Iwashima 	WARN_ON_ONCE(!list_empty(&gc_candidates));
5569d6d7f1cSEric Dumazet 
5579d6d7f1cSEric Dumazet 	/* Paired with READ_ONCE() in wait_for_unix_gc(). */
5589d6d7f1cSEric Dumazet 	WRITE_ONCE(gc_in_progress, false);
5599d6d7f1cSEric Dumazet 
5601fd05ba5SMiklos Szeredi 	spin_unlock(&unix_gc_lock);
5611da177e4SLinus Torvalds }
5628b90a9f8SKuniyuki Iwashima 
5638b90a9f8SKuniyuki Iwashima static DECLARE_WORK(unix_gc_work, __unix_gc);
5648b90a9f8SKuniyuki Iwashima 
5658b90a9f8SKuniyuki Iwashima void unix_gc(void)
5668b90a9f8SKuniyuki Iwashima {
5678b90a9f8SKuniyuki Iwashima 	WRITE_ONCE(gc_in_progress, true);
5688b90a9f8SKuniyuki Iwashima 	queue_work(system_unbound_wq, &unix_gc_work);
5698b90a9f8SKuniyuki Iwashima }
5708b90a9f8SKuniyuki Iwashima 
5718b90a9f8SKuniyuki Iwashima #define UNIX_INFLIGHT_TRIGGER_GC 16000
572d9f21b36SKuniyuki Iwashima #define UNIX_INFLIGHT_SANE_USER (SCM_MAX_FD * 8)
5738b90a9f8SKuniyuki Iwashima 
574d9f21b36SKuniyuki Iwashima void wait_for_unix_gc(struct scm_fp_list *fpl)
5758b90a9f8SKuniyuki Iwashima {
5768b90a9f8SKuniyuki Iwashima 	/* If number of inflight sockets is insane,
5778b90a9f8SKuniyuki Iwashima 	 * force a garbage collect right now.
5788b90a9f8SKuniyuki Iwashima 	 *
5798b90a9f8SKuniyuki Iwashima 	 * Paired with the WRITE_ONCE() in unix_inflight(),
5808b90a9f8SKuniyuki Iwashima 	 * unix_notinflight(), and __unix_gc().
5818b90a9f8SKuniyuki Iwashima 	 */
5828b90a9f8SKuniyuki Iwashima 	if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC &&
5838b90a9f8SKuniyuki Iwashima 	    !READ_ONCE(gc_in_progress))
5848b90a9f8SKuniyuki Iwashima 		unix_gc();
5858b90a9f8SKuniyuki Iwashima 
586d9f21b36SKuniyuki Iwashima 	/* Penalise users who want to send AF_UNIX sockets
587d9f21b36SKuniyuki Iwashima 	 * but whose sockets have not been received yet.
588d9f21b36SKuniyuki Iwashima 	 */
589d9f21b36SKuniyuki Iwashima 	if (!fpl || !fpl->count_unix ||
590d9f21b36SKuniyuki Iwashima 	    READ_ONCE(fpl->user->unix_inflight) < UNIX_INFLIGHT_SANE_USER)
591d9f21b36SKuniyuki Iwashima 		return;
592d9f21b36SKuniyuki Iwashima 
5938b90a9f8SKuniyuki Iwashima 	if (READ_ONCE(gc_in_progress))
5948b90a9f8SKuniyuki Iwashima 		flush_work(&unix_gc_work);
5958b90a9f8SKuniyuki Iwashima }
596