1a85036f6SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds * NET3: Garbage Collector For AF_UNIX sockets
41da177e4SLinus Torvalds *
51da177e4SLinus Torvalds * Garbage Collector:
61da177e4SLinus Torvalds * Copyright (C) Barak A. Pearlmutter.
71da177e4SLinus Torvalds *
81da177e4SLinus Torvalds * Chopped about by Alan Cox 22/3/96 to make it fit the AF_UNIX socket problem.
91da177e4SLinus Torvalds * If it doesn't work blame me, it worked when Barak sent it.
101da177e4SLinus Torvalds *
111da177e4SLinus Torvalds * Assumptions:
121da177e4SLinus Torvalds *
131da177e4SLinus Torvalds * - object w/ a bit
141da177e4SLinus Torvalds * - free list
151da177e4SLinus Torvalds *
161da177e4SLinus Torvalds * Current optimizations:
171da177e4SLinus Torvalds *
181da177e4SLinus Torvalds * - explicit stack instead of recursion
191da177e4SLinus Torvalds * - tail recurse on first born instead of immediate push/pop
201da177e4SLinus Torvalds * - we gather the stuff that should not be killed into tree
211da177e4SLinus Torvalds * and stack is just a path from root to the current pointer.
221da177e4SLinus Torvalds *
231da177e4SLinus Torvalds * Future optimizations:
241da177e4SLinus Torvalds *
251da177e4SLinus Torvalds * - don't just push entire root set; process in place
261da177e4SLinus Torvalds *
271da177e4SLinus Torvalds * Fixes:
281da177e4SLinus Torvalds * Alan Cox 07 Sept 1997 Vmalloc internal stack as needed.
291da177e4SLinus Torvalds * Cope with changing max_files.
301da177e4SLinus Torvalds * Al Viro 11 Oct 1998
311da177e4SLinus Torvalds * Graph may have cycles. That is, we can send the descriptor
321da177e4SLinus Torvalds * of foo to bar and vice versa. Current code chokes on that.
331da177e4SLinus Torvalds * Fix: move SCM_RIGHTS ones into the separate list and then
341da177e4SLinus Torvalds * skb_free() them all instead of doing explicit fput's.
351da177e4SLinus Torvalds * Another problem: since fput() may block somebody may
361da177e4SLinus Torvalds * create a new unix_socket when we are in the middle of sweep
371da177e4SLinus Torvalds * phase. Fix: revert the logic wrt MARKED. Mark everything
381da177e4SLinus Torvalds * upon the beginning and unmark non-junk ones.
391da177e4SLinus Torvalds *
401da177e4SLinus Torvalds * [12 Oct 1998] AAARGH! New code purges all SCM_RIGHTS
411da177e4SLinus Torvalds * sent to connect()'ed but still not accept()'ed sockets.
421da177e4SLinus Torvalds * Fixed. Old code had slightly different problem here:
431da177e4SLinus Torvalds * extra fput() in situation when we passed the descriptor via
441da177e4SLinus Torvalds * such socket and closed it (descriptor). That would happen on
451da177e4SLinus Torvalds * each unix_gc() until the accept(). Since the struct file in
461da177e4SLinus Torvalds * question would go to the free list and might be reused...
471da177e4SLinus Torvalds * That might be the reason of random oopses on filp_close()
481da177e4SLinus Torvalds * in unrelated processes.
491da177e4SLinus Torvalds *
501da177e4SLinus Torvalds * AV 28 Feb 1999
511da177e4SLinus Torvalds * Kill the explicit allocation of stack. Now we keep the tree
521da177e4SLinus Torvalds * with root in dummy + pointer (gc_current) to one of the nodes.
531da177e4SLinus Torvalds * Stack is represented as path from gc_current to dummy. Unmark
541da177e4SLinus Torvalds * now means "add to tree". Push == "make it a son of gc_current".
551da177e4SLinus Torvalds * Pop == "move gc_current to parent". We keep only pointers to
561da177e4SLinus Torvalds * parents (->gc_tree).
571da177e4SLinus Torvalds * AV 1 Mar 1999
581da177e4SLinus Torvalds * Damn. Added missing check for ->dead in listen queues scanning.
591da177e4SLinus Torvalds *
601fd05ba5SMiklos Szeredi * Miklos Szeredi 25 Jun 2007
611fd05ba5SMiklos Szeredi * Reimplement with a cycle collecting algorithm. This should
621fd05ba5SMiklos Szeredi * solve several problems with the previous code, like being racy
631fd05ba5SMiklos Szeredi * wrt receive and holding up unrelated socket operations.
641da177e4SLinus Torvalds */
651da177e4SLinus Torvalds
661da177e4SLinus Torvalds #include <linux/kernel.h>
671da177e4SLinus Torvalds #include <linux/string.h>
681da177e4SLinus Torvalds #include <linux/socket.h>
691da177e4SLinus Torvalds #include <linux/un.h>
701da177e4SLinus Torvalds #include <linux/net.h>
711da177e4SLinus Torvalds #include <linux/fs.h>
721da177e4SLinus Torvalds #include <linux/skbuff.h>
731da177e4SLinus Torvalds #include <linux/netdevice.h>
741da177e4SLinus Torvalds #include <linux/file.h>
751da177e4SLinus Torvalds #include <linux/proc_fs.h>
764a3e2f71SArjan van de Ven #include <linux/mutex.h>
775f23b734Sdann frazier #include <linux/wait.h>
781da177e4SLinus Torvalds
791da177e4SLinus Torvalds #include <net/sock.h>
801da177e4SLinus Torvalds #include <net/af_unix.h>
811da177e4SLinus Torvalds #include <net/scm.h>
82c752f073SArnaldo Carvalho de Melo #include <net/tcp_states.h>
831da177e4SLinus Torvalds
unix_get_socket(struct file * filp)8499a7a5b9SKuniyuki Iwashima struct unix_sock *unix_get_socket(struct file *filp)
8599a7a5b9SKuniyuki Iwashima {
8699a7a5b9SKuniyuki Iwashima struct inode *inode = file_inode(filp);
87f4e65870SJens Axboe
8899a7a5b9SKuniyuki Iwashima /* Socket ? */
8999a7a5b9SKuniyuki Iwashima if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
9099a7a5b9SKuniyuki Iwashima struct socket *sock = SOCKET_I(inode);
9199a7a5b9SKuniyuki Iwashima const struct proto_ops *ops;
9299a7a5b9SKuniyuki Iwashima struct sock *sk = sock->sk;
931da177e4SLinus Torvalds
9499a7a5b9SKuniyuki Iwashima ops = READ_ONCE(sock->ops);
9599a7a5b9SKuniyuki Iwashima
9699a7a5b9SKuniyuki Iwashima /* PF_UNIX ? */
9799a7a5b9SKuniyuki Iwashima if (sk && ops && ops->family == PF_UNIX)
9899a7a5b9SKuniyuki Iwashima return unix_sk(sk);
9999a7a5b9SKuniyuki Iwashima }
10099a7a5b9SKuniyuki Iwashima
10199a7a5b9SKuniyuki Iwashima return NULL;
10299a7a5b9SKuniyuki Iwashima }
10399a7a5b9SKuniyuki Iwashima
unix_edge_successor(struct unix_edge * edge)104dcf70df2SKuniyuki Iwashima static struct unix_vertex *unix_edge_successor(struct unix_edge *edge)
105dcf70df2SKuniyuki Iwashima {
106dcf70df2SKuniyuki Iwashima /* If an embryo socket has a fd,
107dcf70df2SKuniyuki Iwashima * the listener indirectly holds the fd's refcnt.
108dcf70df2SKuniyuki Iwashima */
109dcf70df2SKuniyuki Iwashima if (edge->successor->listener)
110dcf70df2SKuniyuki Iwashima return unix_sk(edge->successor->listener)->vertex;
111dcf70df2SKuniyuki Iwashima
112dcf70df2SKuniyuki Iwashima return edge->successor->vertex;
113dcf70df2SKuniyuki Iwashima }
114dcf70df2SKuniyuki Iwashima
11577e5593aSKuniyuki Iwashima static bool unix_graph_maybe_cyclic;
116ad081928SKuniyuki Iwashima static bool unix_graph_grouped;
11777e5593aSKuniyuki Iwashima
unix_update_graph(struct unix_vertex * vertex)11877e5593aSKuniyuki Iwashima static void unix_update_graph(struct unix_vertex *vertex)
11977e5593aSKuniyuki Iwashima {
12077e5593aSKuniyuki Iwashima /* If the receiver socket is not inflight, no cyclic
12177e5593aSKuniyuki Iwashima * reference could be formed.
12277e5593aSKuniyuki Iwashima */
12377e5593aSKuniyuki Iwashima if (!vertex)
12477e5593aSKuniyuki Iwashima return;
12577e5593aSKuniyuki Iwashima
12677e5593aSKuniyuki Iwashima unix_graph_maybe_cyclic = true;
127ad081928SKuniyuki Iwashima unix_graph_grouped = false;
12877e5593aSKuniyuki Iwashima }
12977e5593aSKuniyuki Iwashima
13042f298c0SKuniyuki Iwashima static LIST_HEAD(unix_unvisited_vertices);
13142f298c0SKuniyuki Iwashima
1326ba76fd2SKuniyuki Iwashima enum unix_vertex_index {
133ba31b4a4SKuniyuki Iwashima UNIX_VERTEX_INDEX_MARK1,
134ba31b4a4SKuniyuki Iwashima UNIX_VERTEX_INDEX_MARK2,
1356ba76fd2SKuniyuki Iwashima UNIX_VERTEX_INDEX_START,
1366ba76fd2SKuniyuki Iwashima };
1376ba76fd2SKuniyuki Iwashima
138ba31b4a4SKuniyuki Iwashima static unsigned long unix_vertex_unvisited_index = UNIX_VERTEX_INDEX_MARK1;
139ba31b4a4SKuniyuki Iwashima
unix_add_edge(struct scm_fp_list * fpl,struct unix_edge * edge)14042f298c0SKuniyuki Iwashima static void unix_add_edge(struct scm_fp_list *fpl, struct unix_edge *edge)
14142f298c0SKuniyuki Iwashima {
14242f298c0SKuniyuki Iwashima struct unix_vertex *vertex = edge->predecessor->vertex;
14342f298c0SKuniyuki Iwashima
14442f298c0SKuniyuki Iwashima if (!vertex) {
14542f298c0SKuniyuki Iwashima vertex = list_first_entry(&fpl->vertices, typeof(*vertex), entry);
146ba31b4a4SKuniyuki Iwashima vertex->index = unix_vertex_unvisited_index;
14742f298c0SKuniyuki Iwashima vertex->out_degree = 0;
14842f298c0SKuniyuki Iwashima INIT_LIST_HEAD(&vertex->edges);
149ad081928SKuniyuki Iwashima INIT_LIST_HEAD(&vertex->scc_entry);
15042f298c0SKuniyuki Iwashima
15142f298c0SKuniyuki Iwashima list_move_tail(&vertex->entry, &unix_unvisited_vertices);
15242f298c0SKuniyuki Iwashima edge->predecessor->vertex = vertex;
15342f298c0SKuniyuki Iwashima }
15442f298c0SKuniyuki Iwashima
15542f298c0SKuniyuki Iwashima vertex->out_degree++;
15642f298c0SKuniyuki Iwashima list_add_tail(&edge->vertex_entry, &vertex->edges);
15777e5593aSKuniyuki Iwashima
15877e5593aSKuniyuki Iwashima unix_update_graph(unix_edge_successor(edge));
15942f298c0SKuniyuki Iwashima }
16042f298c0SKuniyuki Iwashima
unix_del_edge(struct scm_fp_list * fpl,struct unix_edge * edge)16142f298c0SKuniyuki Iwashima static void unix_del_edge(struct scm_fp_list *fpl, struct unix_edge *edge)
16242f298c0SKuniyuki Iwashima {
16342f298c0SKuniyuki Iwashima struct unix_vertex *vertex = edge->predecessor->vertex;
16442f298c0SKuniyuki Iwashima
1657172dc93SKuniyuki Iwashima if (!fpl->dead)
16677e5593aSKuniyuki Iwashima unix_update_graph(unix_edge_successor(edge));
16777e5593aSKuniyuki Iwashima
16842f298c0SKuniyuki Iwashima list_del(&edge->vertex_entry);
16942f298c0SKuniyuki Iwashima vertex->out_degree--;
17042f298c0SKuniyuki Iwashima
17142f298c0SKuniyuki Iwashima if (!vertex->out_degree) {
17242f298c0SKuniyuki Iwashima edge->predecessor->vertex = NULL;
17342f298c0SKuniyuki Iwashima list_move_tail(&vertex->entry, &fpl->vertices);
17442f298c0SKuniyuki Iwashima }
17542f298c0SKuniyuki Iwashima }
17642f298c0SKuniyuki Iwashima
unix_free_vertices(struct scm_fp_list * fpl)1771fbfdfaaSKuniyuki Iwashima static void unix_free_vertices(struct scm_fp_list *fpl)
1781fbfdfaaSKuniyuki Iwashima {
1791fbfdfaaSKuniyuki Iwashima struct unix_vertex *vertex, *next_vertex;
1801fbfdfaaSKuniyuki Iwashima
1811fbfdfaaSKuniyuki Iwashima list_for_each_entry_safe(vertex, next_vertex, &fpl->vertices, entry) {
1821fbfdfaaSKuniyuki Iwashima list_del(&vertex->entry);
1831fbfdfaaSKuniyuki Iwashima kfree(vertex);
1841fbfdfaaSKuniyuki Iwashima }
1851fbfdfaaSKuniyuki Iwashima }
1861fbfdfaaSKuniyuki Iwashima
187118f457dSKuniyuki Iwashima static DEFINE_SPINLOCK(unix_gc_lock);
18822c3c0c5SKuniyuki Iwashima unsigned int unix_tot_inflight;
18942f298c0SKuniyuki Iwashima
unix_add_edges(struct scm_fp_list * fpl,struct unix_sock * receiver)19042f298c0SKuniyuki Iwashima void unix_add_edges(struct scm_fp_list *fpl, struct unix_sock *receiver)
19142f298c0SKuniyuki Iwashima {
19242f298c0SKuniyuki Iwashima int i = 0, j = 0;
19342f298c0SKuniyuki Iwashima
19442f298c0SKuniyuki Iwashima spin_lock(&unix_gc_lock);
19542f298c0SKuniyuki Iwashima
19642f298c0SKuniyuki Iwashima if (!fpl->count_unix)
19742f298c0SKuniyuki Iwashima goto out;
19842f298c0SKuniyuki Iwashima
19942f298c0SKuniyuki Iwashima do {
20042f298c0SKuniyuki Iwashima struct unix_sock *inflight = unix_get_socket(fpl->fp[j++]);
20142f298c0SKuniyuki Iwashima struct unix_edge *edge;
20242f298c0SKuniyuki Iwashima
20342f298c0SKuniyuki Iwashima if (!inflight)
20442f298c0SKuniyuki Iwashima continue;
20542f298c0SKuniyuki Iwashima
20642f298c0SKuniyuki Iwashima edge = fpl->edges + i++;
20742f298c0SKuniyuki Iwashima edge->predecessor = inflight;
20842f298c0SKuniyuki Iwashima edge->successor = receiver;
20942f298c0SKuniyuki Iwashima
21042f298c0SKuniyuki Iwashima unix_add_edge(fpl, edge);
21142f298c0SKuniyuki Iwashima } while (i < fpl->count_unix);
21242f298c0SKuniyuki Iwashima
213fd863448SKuniyuki Iwashima receiver->scm_stat.nr_unix_fds += fpl->count_unix;
21422c3c0c5SKuniyuki Iwashima WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + fpl->count_unix);
21542f298c0SKuniyuki Iwashima out:
21622c3c0c5SKuniyuki Iwashima WRITE_ONCE(fpl->user->unix_inflight, fpl->user->unix_inflight + fpl->count);
21722c3c0c5SKuniyuki Iwashima
21842f298c0SKuniyuki Iwashima spin_unlock(&unix_gc_lock);
21942f298c0SKuniyuki Iwashima
22042f298c0SKuniyuki Iwashima fpl->inflight = true;
22142f298c0SKuniyuki Iwashima
22242f298c0SKuniyuki Iwashima unix_free_vertices(fpl);
22342f298c0SKuniyuki Iwashima }
22442f298c0SKuniyuki Iwashima
unix_del_edges(struct scm_fp_list * fpl)22542f298c0SKuniyuki Iwashima void unix_del_edges(struct scm_fp_list *fpl)
22642f298c0SKuniyuki Iwashima {
227fd863448SKuniyuki Iwashima struct unix_sock *receiver;
22842f298c0SKuniyuki Iwashima int i = 0;
22942f298c0SKuniyuki Iwashima
23042f298c0SKuniyuki Iwashima spin_lock(&unix_gc_lock);
23142f298c0SKuniyuki Iwashima
23242f298c0SKuniyuki Iwashima if (!fpl->count_unix)
23342f298c0SKuniyuki Iwashima goto out;
23442f298c0SKuniyuki Iwashima
23542f298c0SKuniyuki Iwashima do {
23642f298c0SKuniyuki Iwashima struct unix_edge *edge = fpl->edges + i++;
23742f298c0SKuniyuki Iwashima
23842f298c0SKuniyuki Iwashima unix_del_edge(fpl, edge);
23942f298c0SKuniyuki Iwashima } while (i < fpl->count_unix);
24042f298c0SKuniyuki Iwashima
2417172dc93SKuniyuki Iwashima if (!fpl->dead) {
242fd863448SKuniyuki Iwashima receiver = fpl->edges[0].successor;
243fd863448SKuniyuki Iwashima receiver->scm_stat.nr_unix_fds -= fpl->count_unix;
2441af2dfacSKuniyuki Iwashima }
24522c3c0c5SKuniyuki Iwashima WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - fpl->count_unix);
24642f298c0SKuniyuki Iwashima out:
24722c3c0c5SKuniyuki Iwashima WRITE_ONCE(fpl->user->unix_inflight, fpl->user->unix_inflight - fpl->count);
24822c3c0c5SKuniyuki Iwashima
24942f298c0SKuniyuki Iwashima spin_unlock(&unix_gc_lock);
25042f298c0SKuniyuki Iwashima
25142f298c0SKuniyuki Iwashima fpl->inflight = false;
25242f298c0SKuniyuki Iwashima }
25342f298c0SKuniyuki Iwashima
unix_update_edges(struct unix_sock * receiver)254dcf70df2SKuniyuki Iwashima void unix_update_edges(struct unix_sock *receiver)
255dcf70df2SKuniyuki Iwashima {
256fd863448SKuniyuki Iwashima /* nr_unix_fds is only updated under unix_state_lock().
257fd863448SKuniyuki Iwashima * If it's 0 here, the embryo socket is not part of the
258fd863448SKuniyuki Iwashima * inflight graph, and GC will not see it, so no lock needed.
259fd863448SKuniyuki Iwashima */
260fd863448SKuniyuki Iwashima if (!receiver->scm_stat.nr_unix_fds) {
261fd863448SKuniyuki Iwashima receiver->listener = NULL;
262fd863448SKuniyuki Iwashima } else {
263dcf70df2SKuniyuki Iwashima spin_lock(&unix_gc_lock);
26477e5593aSKuniyuki Iwashima unix_update_graph(unix_sk(receiver->listener)->vertex);
265dcf70df2SKuniyuki Iwashima receiver->listener = NULL;
266dcf70df2SKuniyuki Iwashima spin_unlock(&unix_gc_lock);
267dcf70df2SKuniyuki Iwashima }
268fd863448SKuniyuki Iwashima }
269dcf70df2SKuniyuki Iwashima
unix_prepare_fpl(struct scm_fp_list * fpl)2701fbfdfaaSKuniyuki Iwashima int unix_prepare_fpl(struct scm_fp_list *fpl)
2711fbfdfaaSKuniyuki Iwashima {
2721fbfdfaaSKuniyuki Iwashima struct unix_vertex *vertex;
2731fbfdfaaSKuniyuki Iwashima int i;
2741fbfdfaaSKuniyuki Iwashima
2751fbfdfaaSKuniyuki Iwashima if (!fpl->count_unix)
2761fbfdfaaSKuniyuki Iwashima return 0;
2771fbfdfaaSKuniyuki Iwashima
2781fbfdfaaSKuniyuki Iwashima for (i = 0; i < fpl->count_unix; i++) {
2791fbfdfaaSKuniyuki Iwashima vertex = kmalloc(sizeof(*vertex), GFP_KERNEL);
2801fbfdfaaSKuniyuki Iwashima if (!vertex)
2811fbfdfaaSKuniyuki Iwashima goto err;
2821fbfdfaaSKuniyuki Iwashima
2831fbfdfaaSKuniyuki Iwashima list_add(&vertex->entry, &fpl->vertices);
2841fbfdfaaSKuniyuki Iwashima }
2851fbfdfaaSKuniyuki Iwashima
28629b64e35SKuniyuki Iwashima fpl->edges = kvmalloc_array(fpl->count_unix, sizeof(*fpl->edges),
28729b64e35SKuniyuki Iwashima GFP_KERNEL_ACCOUNT);
28829b64e35SKuniyuki Iwashima if (!fpl->edges)
28929b64e35SKuniyuki Iwashima goto err;
29029b64e35SKuniyuki Iwashima
2911fbfdfaaSKuniyuki Iwashima return 0;
2921fbfdfaaSKuniyuki Iwashima
2931fbfdfaaSKuniyuki Iwashima err:
2941fbfdfaaSKuniyuki Iwashima unix_free_vertices(fpl);
2951fbfdfaaSKuniyuki Iwashima return -ENOMEM;
2961fbfdfaaSKuniyuki Iwashima }
2971fbfdfaaSKuniyuki Iwashima
unix_destroy_fpl(struct scm_fp_list * fpl)2981fbfdfaaSKuniyuki Iwashima void unix_destroy_fpl(struct scm_fp_list *fpl)
2991fbfdfaaSKuniyuki Iwashima {
30042f298c0SKuniyuki Iwashima if (fpl->inflight)
30142f298c0SKuniyuki Iwashima unix_del_edges(fpl);
30242f298c0SKuniyuki Iwashima
30329b64e35SKuniyuki Iwashima kvfree(fpl->edges);
3041fbfdfaaSKuniyuki Iwashima unix_free_vertices(fpl);
3051fbfdfaaSKuniyuki Iwashima }
3061fbfdfaaSKuniyuki Iwashima
unix_vertex_dead(struct unix_vertex * vertex)307a15702d8SKuniyuki Iwashima static bool unix_vertex_dead(struct unix_vertex *vertex)
308a15702d8SKuniyuki Iwashima {
309a15702d8SKuniyuki Iwashima struct unix_edge *edge;
310a15702d8SKuniyuki Iwashima struct unix_sock *u;
311a15702d8SKuniyuki Iwashima long total_ref;
312a15702d8SKuniyuki Iwashima
313a15702d8SKuniyuki Iwashima list_for_each_entry(edge, &vertex->edges, vertex_entry) {
314a15702d8SKuniyuki Iwashima struct unix_vertex *next_vertex = unix_edge_successor(edge);
315a15702d8SKuniyuki Iwashima
316a15702d8SKuniyuki Iwashima /* The vertex's fd can be received by a non-inflight socket. */
317a15702d8SKuniyuki Iwashima if (!next_vertex)
318a15702d8SKuniyuki Iwashima return false;
319a15702d8SKuniyuki Iwashima
320a15702d8SKuniyuki Iwashima /* The vertex's fd can be received by an inflight socket in
321a15702d8SKuniyuki Iwashima * another SCC.
322a15702d8SKuniyuki Iwashima */
323a15702d8SKuniyuki Iwashima if (next_vertex->scc_index != vertex->scc_index)
324a15702d8SKuniyuki Iwashima return false;
325a15702d8SKuniyuki Iwashima }
326a15702d8SKuniyuki Iwashima
327a15702d8SKuniyuki Iwashima /* No receiver exists out of the same SCC. */
328a15702d8SKuniyuki Iwashima
329a15702d8SKuniyuki Iwashima edge = list_first_entry(&vertex->edges, typeof(*edge), vertex_entry);
330a15702d8SKuniyuki Iwashima u = edge->predecessor;
331a15702d8SKuniyuki Iwashima total_ref = file_count(u->sk.sk_socket->file);
332a15702d8SKuniyuki Iwashima
333a15702d8SKuniyuki Iwashima /* If not close()d, total_ref > out_degree. */
334a15702d8SKuniyuki Iwashima if (total_ref != vertex->out_degree)
335a15702d8SKuniyuki Iwashima return false;
336a15702d8SKuniyuki Iwashima
337a15702d8SKuniyuki Iwashima return true;
338a15702d8SKuniyuki Iwashima }
339a15702d8SKuniyuki Iwashima
unix_collect_skb(struct list_head * scc,struct sk_buff_head * hitlist)3404090fa37SKuniyuki Iwashima static void unix_collect_skb(struct list_head *scc, struct sk_buff_head *hitlist)
3414090fa37SKuniyuki Iwashima {
3424090fa37SKuniyuki Iwashima struct unix_vertex *vertex;
3434090fa37SKuniyuki Iwashima
3444090fa37SKuniyuki Iwashima list_for_each_entry_reverse(vertex, scc, scc_entry) {
3454090fa37SKuniyuki Iwashima struct sk_buff_head *queue;
3464090fa37SKuniyuki Iwashima struct unix_edge *edge;
3474090fa37SKuniyuki Iwashima struct unix_sock *u;
3484090fa37SKuniyuki Iwashima
3494090fa37SKuniyuki Iwashima edge = list_first_entry(&vertex->edges, typeof(*edge), vertex_entry);
3504090fa37SKuniyuki Iwashima u = edge->predecessor;
3514090fa37SKuniyuki Iwashima queue = &u->sk.sk_receive_queue;
3524090fa37SKuniyuki Iwashima
3534090fa37SKuniyuki Iwashima spin_lock(&queue->lock);
3544090fa37SKuniyuki Iwashima
3554090fa37SKuniyuki Iwashima if (u->sk.sk_state == TCP_LISTEN) {
3564090fa37SKuniyuki Iwashima struct sk_buff *skb;
3574090fa37SKuniyuki Iwashima
3584090fa37SKuniyuki Iwashima skb_queue_walk(queue, skb) {
3594090fa37SKuniyuki Iwashima struct sk_buff_head *embryo_queue = &skb->sk->sk_receive_queue;
3604090fa37SKuniyuki Iwashima
3618647ece4SKuniyuki Iwashima spin_lock(&embryo_queue->lock);
362*8594d9b8SKuniyuki Iwashima skb_queue_splice_init(embryo_queue, hitlist);
3634090fa37SKuniyuki Iwashima spin_unlock(&embryo_queue->lock);
3644090fa37SKuniyuki Iwashima }
3654090fa37SKuniyuki Iwashima } else {
366*8594d9b8SKuniyuki Iwashima skb_queue_splice_init(queue, hitlist);
3674090fa37SKuniyuki Iwashima }
3684090fa37SKuniyuki Iwashima
3694090fa37SKuniyuki Iwashima spin_unlock(&queue->lock);
3704090fa37SKuniyuki Iwashima }
3714090fa37SKuniyuki Iwashima }
3724090fa37SKuniyuki Iwashima
unix_scc_cyclic(struct list_head * scc)37377e5593aSKuniyuki Iwashima static bool unix_scc_cyclic(struct list_head *scc)
37477e5593aSKuniyuki Iwashima {
37577e5593aSKuniyuki Iwashima struct unix_vertex *vertex;
37677e5593aSKuniyuki Iwashima struct unix_edge *edge;
37777e5593aSKuniyuki Iwashima
37877e5593aSKuniyuki Iwashima /* SCC containing multiple vertices ? */
37977e5593aSKuniyuki Iwashima if (!list_is_singular(scc))
38077e5593aSKuniyuki Iwashima return true;
38177e5593aSKuniyuki Iwashima
38277e5593aSKuniyuki Iwashima vertex = list_first_entry(scc, typeof(*vertex), scc_entry);
38377e5593aSKuniyuki Iwashima
38477e5593aSKuniyuki Iwashima /* Self-reference or a embryo-listener circle ? */
38577e5593aSKuniyuki Iwashima list_for_each_entry(edge, &vertex->edges, vertex_entry) {
38677e5593aSKuniyuki Iwashima if (unix_edge_successor(edge) == vertex)
38777e5593aSKuniyuki Iwashima return true;
38877e5593aSKuniyuki Iwashima }
38977e5593aSKuniyuki Iwashima
39077e5593aSKuniyuki Iwashima return false;
39177e5593aSKuniyuki Iwashima }
39277e5593aSKuniyuki Iwashima
3936ba76fd2SKuniyuki Iwashima static LIST_HEAD(unix_visited_vertices);
394ba31b4a4SKuniyuki Iwashima static unsigned long unix_vertex_grouped_index = UNIX_VERTEX_INDEX_MARK2;
3956ba76fd2SKuniyuki Iwashima
__unix_walk_scc(struct unix_vertex * vertex,unsigned long * last_index,struct sk_buff_head * hitlist)3964090fa37SKuniyuki Iwashima static void __unix_walk_scc(struct unix_vertex *vertex, unsigned long *last_index,
3974090fa37SKuniyuki Iwashima struct sk_buff_head *hitlist)
3986ba76fd2SKuniyuki Iwashima {
3993484f063SKuniyuki Iwashima LIST_HEAD(vertex_stack);
4006ba76fd2SKuniyuki Iwashima struct unix_edge *edge;
4016ba76fd2SKuniyuki Iwashima LIST_HEAD(edge_stack);
4026ba76fd2SKuniyuki Iwashima
4036ba76fd2SKuniyuki Iwashima next_vertex:
404ba31b4a4SKuniyuki Iwashima /* Push vertex to vertex_stack and mark it as on-stack
405ba31b4a4SKuniyuki Iwashima * (index >= UNIX_VERTEX_INDEX_START).
4063484f063SKuniyuki Iwashima * The vertex will be popped when finalising SCC later.
4073484f063SKuniyuki Iwashima */
4083484f063SKuniyuki Iwashima list_add(&vertex->scc_entry, &vertex_stack);
4093484f063SKuniyuki Iwashima
410bfdb0128SKuniyuki Iwashima vertex->index = *last_index;
411bfdb0128SKuniyuki Iwashima vertex->scc_index = *last_index;
412bfdb0128SKuniyuki Iwashima (*last_index)++;
4136ba76fd2SKuniyuki Iwashima
4146ba76fd2SKuniyuki Iwashima /* Explore neighbour vertices (receivers of the current vertex's fd). */
4156ba76fd2SKuniyuki Iwashima list_for_each_entry(edge, &vertex->edges, vertex_entry) {
416dcf70df2SKuniyuki Iwashima struct unix_vertex *next_vertex = unix_edge_successor(edge);
4176ba76fd2SKuniyuki Iwashima
4186ba76fd2SKuniyuki Iwashima if (!next_vertex)
4196ba76fd2SKuniyuki Iwashima continue;
4206ba76fd2SKuniyuki Iwashima
421ba31b4a4SKuniyuki Iwashima if (next_vertex->index == unix_vertex_unvisited_index) {
4226ba76fd2SKuniyuki Iwashima /* Iterative deepening depth first search
4236ba76fd2SKuniyuki Iwashima *
4246ba76fd2SKuniyuki Iwashima * 1. Push a forward edge to edge_stack and set
4256ba76fd2SKuniyuki Iwashima * the successor to vertex for the next iteration.
4266ba76fd2SKuniyuki Iwashima */
4276ba76fd2SKuniyuki Iwashima list_add(&edge->stack_entry, &edge_stack);
4286ba76fd2SKuniyuki Iwashima
4296ba76fd2SKuniyuki Iwashima vertex = next_vertex;
4306ba76fd2SKuniyuki Iwashima goto next_vertex;
4316ba76fd2SKuniyuki Iwashima
4326ba76fd2SKuniyuki Iwashima /* 2. Pop the edge directed to the current vertex
4336ba76fd2SKuniyuki Iwashima * and restore the ancestor for backtracking.
4346ba76fd2SKuniyuki Iwashima */
4356ba76fd2SKuniyuki Iwashima prev_vertex:
4366ba76fd2SKuniyuki Iwashima edge = list_first_entry(&edge_stack, typeof(*edge), stack_entry);
4376ba76fd2SKuniyuki Iwashima list_del_init(&edge->stack_entry);
4386ba76fd2SKuniyuki Iwashima
4393484f063SKuniyuki Iwashima next_vertex = vertex;
4406ba76fd2SKuniyuki Iwashima vertex = edge->predecessor->vertex;
4413484f063SKuniyuki Iwashima
442bfdb0128SKuniyuki Iwashima /* If the successor has a smaller scc_index, two vertices
443bfdb0128SKuniyuki Iwashima * are in the same SCC, so propagate the smaller scc_index
4443484f063SKuniyuki Iwashima * to skip SCC finalisation.
4453484f063SKuniyuki Iwashima */
446bfdb0128SKuniyuki Iwashima vertex->scc_index = min(vertex->scc_index, next_vertex->scc_index);
447ba31b4a4SKuniyuki Iwashima } else if (next_vertex->index != unix_vertex_grouped_index) {
4483484f063SKuniyuki Iwashima /* Loop detected by a back/cross edge.
4493484f063SKuniyuki Iwashima *
450bfdb0128SKuniyuki Iwashima * The successor is on vertex_stack, so two vertices are in
451bfdb0128SKuniyuki Iwashima * the same SCC. If the successor has a smaller *scc_index*,
4523484f063SKuniyuki Iwashima * propagate it to skip SCC finalisation.
4533484f063SKuniyuki Iwashima */
454bfdb0128SKuniyuki Iwashima vertex->scc_index = min(vertex->scc_index, next_vertex->scc_index);
4553484f063SKuniyuki Iwashima } else {
4563484f063SKuniyuki Iwashima /* The successor was already grouped as another SCC */
4576ba76fd2SKuniyuki Iwashima }
4586ba76fd2SKuniyuki Iwashima }
4596ba76fd2SKuniyuki Iwashima
460bfdb0128SKuniyuki Iwashima if (vertex->index == vertex->scc_index) {
461927fa5b3SShigeru Yoshida struct unix_vertex *v;
4623484f063SKuniyuki Iwashima struct list_head scc;
463a15702d8SKuniyuki Iwashima bool scc_dead = true;
4643484f063SKuniyuki Iwashima
4653484f063SKuniyuki Iwashima /* SCC finalised.
4663484f063SKuniyuki Iwashima *
467bfdb0128SKuniyuki Iwashima * If the scc_index was not updated, all the vertices above on
4683484f063SKuniyuki Iwashima * vertex_stack are in the same SCC. Group them using scc_entry.
4693484f063SKuniyuki Iwashima */
4703484f063SKuniyuki Iwashima __list_cut_position(&scc, &vertex_stack, &vertex->scc_entry);
4713484f063SKuniyuki Iwashima
472927fa5b3SShigeru Yoshida list_for_each_entry_reverse(v, &scc, scc_entry) {
4736ba76fd2SKuniyuki Iwashima /* Don't restart DFS from this vertex in unix_walk_scc(). */
474927fa5b3SShigeru Yoshida list_move_tail(&v->entry, &unix_visited_vertices);
4756ba76fd2SKuniyuki Iwashima
476ba31b4a4SKuniyuki Iwashima /* Mark vertex as off-stack. */
477927fa5b3SShigeru Yoshida v->index = unix_vertex_grouped_index;
478a15702d8SKuniyuki Iwashima
479a15702d8SKuniyuki Iwashima if (scc_dead)
480927fa5b3SShigeru Yoshida scc_dead = unix_vertex_dead(v);
4813484f063SKuniyuki Iwashima }
4823484f063SKuniyuki Iwashima
4834090fa37SKuniyuki Iwashima if (scc_dead)
4844090fa37SKuniyuki Iwashima unix_collect_skb(&scc, hitlist);
4854090fa37SKuniyuki Iwashima else if (!unix_graph_maybe_cyclic)
48677e5593aSKuniyuki Iwashima unix_graph_maybe_cyclic = unix_scc_cyclic(&scc);
48777e5593aSKuniyuki Iwashima
4883484f063SKuniyuki Iwashima list_del(&scc);
4893484f063SKuniyuki Iwashima }
4903484f063SKuniyuki Iwashima
4916ba76fd2SKuniyuki Iwashima /* Need backtracking ? */
4926ba76fd2SKuniyuki Iwashima if (!list_empty(&edge_stack))
4936ba76fd2SKuniyuki Iwashima goto prev_vertex;
4946ba76fd2SKuniyuki Iwashima }
4956ba76fd2SKuniyuki Iwashima
unix_walk_scc(struct sk_buff_head * hitlist)4964090fa37SKuniyuki Iwashima static void unix_walk_scc(struct sk_buff_head *hitlist)
4976ba76fd2SKuniyuki Iwashima {
498bfdb0128SKuniyuki Iwashima unsigned long last_index = UNIX_VERTEX_INDEX_START;
499bfdb0128SKuniyuki Iwashima
50077e5593aSKuniyuki Iwashima unix_graph_maybe_cyclic = false;
50177e5593aSKuniyuki Iwashima
5026ba76fd2SKuniyuki Iwashima /* Visit every vertex exactly once.
5036ba76fd2SKuniyuki Iwashima * __unix_walk_scc() moves visited vertices to unix_visited_vertices.
5046ba76fd2SKuniyuki Iwashima */
5056ba76fd2SKuniyuki Iwashima while (!list_empty(&unix_unvisited_vertices)) {
506ba31b4a4SKuniyuki Iwashima struct unix_vertex *vertex;
507ba31b4a4SKuniyuki Iwashima
5086ba76fd2SKuniyuki Iwashima vertex = list_first_entry(&unix_unvisited_vertices, typeof(*vertex), entry);
5094090fa37SKuniyuki Iwashima __unix_walk_scc(vertex, &last_index, hitlist);
5106ba76fd2SKuniyuki Iwashima }
5116ba76fd2SKuniyuki Iwashima
5126ba76fd2SKuniyuki Iwashima list_replace_init(&unix_visited_vertices, &unix_unvisited_vertices);
513ba31b4a4SKuniyuki Iwashima swap(unix_vertex_unvisited_index, unix_vertex_grouped_index);
514ad081928SKuniyuki Iwashima
515ad081928SKuniyuki Iwashima unix_graph_grouped = true;
516ad081928SKuniyuki Iwashima }
517ad081928SKuniyuki Iwashima
unix_walk_scc_fast(struct sk_buff_head * hitlist)5184090fa37SKuniyuki Iwashima static void unix_walk_scc_fast(struct sk_buff_head *hitlist)
519ad081928SKuniyuki Iwashima {
5201af2dfacSKuniyuki Iwashima unix_graph_maybe_cyclic = false;
5211af2dfacSKuniyuki Iwashima
522ad081928SKuniyuki Iwashima while (!list_empty(&unix_unvisited_vertices)) {
523ad081928SKuniyuki Iwashima struct unix_vertex *vertex;
524ad081928SKuniyuki Iwashima struct list_head scc;
525a15702d8SKuniyuki Iwashima bool scc_dead = true;
526ad081928SKuniyuki Iwashima
527ad081928SKuniyuki Iwashima vertex = list_first_entry(&unix_unvisited_vertices, typeof(*vertex), entry);
528ad081928SKuniyuki Iwashima list_add(&scc, &vertex->scc_entry);
529ad081928SKuniyuki Iwashima
530a15702d8SKuniyuki Iwashima list_for_each_entry_reverse(vertex, &scc, scc_entry) {
531ad081928SKuniyuki Iwashima list_move_tail(&vertex->entry, &unix_visited_vertices);
532ad081928SKuniyuki Iwashima
533a15702d8SKuniyuki Iwashima if (scc_dead)
534a15702d8SKuniyuki Iwashima scc_dead = unix_vertex_dead(vertex);
535a15702d8SKuniyuki Iwashima }
536a15702d8SKuniyuki Iwashima
5374090fa37SKuniyuki Iwashima if (scc_dead)
5384090fa37SKuniyuki Iwashima unix_collect_skb(&scc, hitlist);
5391af2dfacSKuniyuki Iwashima else if (!unix_graph_maybe_cyclic)
5401af2dfacSKuniyuki Iwashima unix_graph_maybe_cyclic = unix_scc_cyclic(&scc);
5414090fa37SKuniyuki Iwashima
542ad081928SKuniyuki Iwashima list_del(&scc);
543ad081928SKuniyuki Iwashima }
544ad081928SKuniyuki Iwashima
545ad081928SKuniyuki Iwashima list_replace_init(&unix_visited_vertices, &unix_unvisited_vertices);
5466ba76fd2SKuniyuki Iwashima }
5476ba76fd2SKuniyuki Iwashima
5487172dc93SKuniyuki Iwashima static bool gc_in_progress;
5497172dc93SKuniyuki Iwashima
__unix_gc(struct work_struct * work)5508b90a9f8SKuniyuki Iwashima static void __unix_gc(struct work_struct *work)
5515f23b734Sdann frazier {
5521fd05ba5SMiklos Szeredi struct sk_buff_head hitlist;
5537172dc93SKuniyuki Iwashima struct sk_buff *skb;
5541fd05ba5SMiklos Szeredi
5551fd05ba5SMiklos Szeredi spin_lock(&unix_gc_lock);
5561fd05ba5SMiklos Szeredi
5574090fa37SKuniyuki Iwashima if (!unix_graph_maybe_cyclic) {
5584090fa37SKuniyuki Iwashima spin_unlock(&unix_gc_lock);
55977e5593aSKuniyuki Iwashima goto skip_gc;
5604090fa37SKuniyuki Iwashima }
5614090fa37SKuniyuki Iwashima
5624090fa37SKuniyuki Iwashima __skb_queue_head_init(&hitlist);
56377e5593aSKuniyuki Iwashima
564ad081928SKuniyuki Iwashima if (unix_graph_grouped)
5654090fa37SKuniyuki Iwashima unix_walk_scc_fast(&hitlist);
566ad081928SKuniyuki Iwashima else
5674090fa37SKuniyuki Iwashima unix_walk_scc(&hitlist);
5686209344fSMiklos Szeredi
5691fd05ba5SMiklos Szeredi spin_unlock(&unix_gc_lock);
5701fd05ba5SMiklos Szeredi
5717172dc93SKuniyuki Iwashima skb_queue_walk(&hitlist, skb) {
5727172dc93SKuniyuki Iwashima if (UNIXCB(skb).fp)
5737172dc93SKuniyuki Iwashima UNIXCB(skb).fp->dead = true;
5747172dc93SKuniyuki Iwashima }
5757172dc93SKuniyuki Iwashima
5761da177e4SLinus Torvalds __skb_queue_purge(&hitlist);
57777e5593aSKuniyuki Iwashima skip_gc:
5789d6d7f1cSEric Dumazet WRITE_ONCE(gc_in_progress, false);
5791da177e4SLinus Torvalds }
5808b90a9f8SKuniyuki Iwashima
5818b90a9f8SKuniyuki Iwashima static DECLARE_WORK(unix_gc_work, __unix_gc);
5828b90a9f8SKuniyuki Iwashima
unix_gc(void)5838b90a9f8SKuniyuki Iwashima void unix_gc(void)
5848b90a9f8SKuniyuki Iwashima {
5858b90a9f8SKuniyuki Iwashima WRITE_ONCE(gc_in_progress, true);
5868b90a9f8SKuniyuki Iwashima queue_work(system_unbound_wq, &unix_gc_work);
5878b90a9f8SKuniyuki Iwashima }
5888b90a9f8SKuniyuki Iwashima
5898b90a9f8SKuniyuki Iwashima #define UNIX_INFLIGHT_TRIGGER_GC 16000
590d9f21b36SKuniyuki Iwashima #define UNIX_INFLIGHT_SANE_USER (SCM_MAX_FD * 8)
5918b90a9f8SKuniyuki Iwashima
wait_for_unix_gc(struct scm_fp_list * fpl)592d9f21b36SKuniyuki Iwashima void wait_for_unix_gc(struct scm_fp_list *fpl)
5938b90a9f8SKuniyuki Iwashima {
5948b90a9f8SKuniyuki Iwashima /* If number of inflight sockets is insane,
5958b90a9f8SKuniyuki Iwashima * force a garbage collect right now.
5968b90a9f8SKuniyuki Iwashima *
5978b90a9f8SKuniyuki Iwashima * Paired with the WRITE_ONCE() in unix_inflight(),
5988b90a9f8SKuniyuki Iwashima * unix_notinflight(), and __unix_gc().
5998b90a9f8SKuniyuki Iwashima */
6008b90a9f8SKuniyuki Iwashima if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC &&
6018b90a9f8SKuniyuki Iwashima !READ_ONCE(gc_in_progress))
6028b90a9f8SKuniyuki Iwashima unix_gc();
6038b90a9f8SKuniyuki Iwashima
604d9f21b36SKuniyuki Iwashima /* Penalise users who want to send AF_UNIX sockets
605d9f21b36SKuniyuki Iwashima * but whose sockets have not been received yet.
606d9f21b36SKuniyuki Iwashima */
607d9f21b36SKuniyuki Iwashima if (!fpl || !fpl->count_unix ||
608d9f21b36SKuniyuki Iwashima READ_ONCE(fpl->user->unix_inflight) < UNIX_INFLIGHT_SANE_USER)
609d9f21b36SKuniyuki Iwashima return;
610d9f21b36SKuniyuki Iwashima
6118b90a9f8SKuniyuki Iwashima if (READ_ONCE(gc_in_progress))
6128b90a9f8SKuniyuki Iwashima flush_work(&unix_gc_work);
6138b90a9f8SKuniyuki Iwashima }
614