1a85036f6SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later 21da177e4SLinus Torvalds /* 31da177e4SLinus Torvalds * NET3: Garbage Collector For AF_UNIX sockets 41da177e4SLinus Torvalds * 51da177e4SLinus Torvalds * Garbage Collector: 61da177e4SLinus Torvalds * Copyright (C) Barak A. Pearlmutter. 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * Chopped about by Alan Cox 22/3/96 to make it fit the AF_UNIX socket problem. 91da177e4SLinus Torvalds * If it doesn't work blame me, it worked when Barak sent it. 101da177e4SLinus Torvalds * 111da177e4SLinus Torvalds * Assumptions: 121da177e4SLinus Torvalds * 131da177e4SLinus Torvalds * - object w/ a bit 141da177e4SLinus Torvalds * - free list 151da177e4SLinus Torvalds * 161da177e4SLinus Torvalds * Current optimizations: 171da177e4SLinus Torvalds * 181da177e4SLinus Torvalds * - explicit stack instead of recursion 191da177e4SLinus Torvalds * - tail recurse on first born instead of immediate push/pop 201da177e4SLinus Torvalds * - we gather the stuff that should not be killed into tree 211da177e4SLinus Torvalds * and stack is just a path from root to the current pointer. 221da177e4SLinus Torvalds * 231da177e4SLinus Torvalds * Future optimizations: 241da177e4SLinus Torvalds * 251da177e4SLinus Torvalds * - don't just push entire root set; process in place 261da177e4SLinus Torvalds * 271da177e4SLinus Torvalds * Fixes: 281da177e4SLinus Torvalds * Alan Cox 07 Sept 1997 Vmalloc internal stack as needed. 291da177e4SLinus Torvalds * Cope with changing max_files. 301da177e4SLinus Torvalds * Al Viro 11 Oct 1998 311da177e4SLinus Torvalds * Graph may have cycles. That is, we can send the descriptor 321da177e4SLinus Torvalds * of foo to bar and vice versa. Current code chokes on that. 331da177e4SLinus Torvalds * Fix: move SCM_RIGHTS ones into the separate list and then 341da177e4SLinus Torvalds * skb_free() them all instead of doing explicit fput's. 351da177e4SLinus Torvalds * Another problem: since fput() may block somebody may 361da177e4SLinus Torvalds * create a new unix_socket when we are in the middle of sweep 371da177e4SLinus Torvalds * phase. Fix: revert the logic wrt MARKED. Mark everything 381da177e4SLinus Torvalds * upon the beginning and unmark non-junk ones. 391da177e4SLinus Torvalds * 401da177e4SLinus Torvalds * [12 Oct 1998] AAARGH! New code purges all SCM_RIGHTS 411da177e4SLinus Torvalds * sent to connect()'ed but still not accept()'ed sockets. 421da177e4SLinus Torvalds * Fixed. Old code had slightly different problem here: 431da177e4SLinus Torvalds * extra fput() in situation when we passed the descriptor via 441da177e4SLinus Torvalds * such socket and closed it (descriptor). That would happen on 451da177e4SLinus Torvalds * each unix_gc() until the accept(). Since the struct file in 461da177e4SLinus Torvalds * question would go to the free list and might be reused... 471da177e4SLinus Torvalds * That might be the reason of random oopses on filp_close() 481da177e4SLinus Torvalds * in unrelated processes. 491da177e4SLinus Torvalds * 501da177e4SLinus Torvalds * AV 28 Feb 1999 511da177e4SLinus Torvalds * Kill the explicit allocation of stack. Now we keep the tree 521da177e4SLinus Torvalds * with root in dummy + pointer (gc_current) to one of the nodes. 531da177e4SLinus Torvalds * Stack is represented as path from gc_current to dummy. Unmark 541da177e4SLinus Torvalds * now means "add to tree". Push == "make it a son of gc_current". 551da177e4SLinus Torvalds * Pop == "move gc_current to parent". We keep only pointers to 561da177e4SLinus Torvalds * parents (->gc_tree). 571da177e4SLinus Torvalds * AV 1 Mar 1999 581da177e4SLinus Torvalds * Damn. Added missing check for ->dead in listen queues scanning. 591da177e4SLinus Torvalds * 601fd05ba5SMiklos Szeredi * Miklos Szeredi 25 Jun 2007 611fd05ba5SMiklos Szeredi * Reimplement with a cycle collecting algorithm. This should 621fd05ba5SMiklos Szeredi * solve several problems with the previous code, like being racy 631fd05ba5SMiklos Szeredi * wrt receive and holding up unrelated socket operations. 641da177e4SLinus Torvalds */ 651da177e4SLinus Torvalds 661da177e4SLinus Torvalds #include <linux/kernel.h> 671da177e4SLinus Torvalds #include <linux/string.h> 681da177e4SLinus Torvalds #include <linux/socket.h> 691da177e4SLinus Torvalds #include <linux/un.h> 701da177e4SLinus Torvalds #include <linux/net.h> 711da177e4SLinus Torvalds #include <linux/fs.h> 721da177e4SLinus Torvalds #include <linux/skbuff.h> 731da177e4SLinus Torvalds #include <linux/netdevice.h> 741da177e4SLinus Torvalds #include <linux/file.h> 751da177e4SLinus Torvalds #include <linux/proc_fs.h> 764a3e2f71SArjan van de Ven #include <linux/mutex.h> 775f23b734Sdann frazier #include <linux/wait.h> 781da177e4SLinus Torvalds 791da177e4SLinus Torvalds #include <net/sock.h> 801da177e4SLinus Torvalds #include <net/af_unix.h> 811da177e4SLinus Torvalds #include <net/scm.h> 82c752f073SArnaldo Carvalho de Melo #include <net/tcp_states.h> 831da177e4SLinus Torvalds 8499a7a5b9SKuniyuki Iwashima struct unix_sock *unix_get_socket(struct file *filp) 8599a7a5b9SKuniyuki Iwashima { 8699a7a5b9SKuniyuki Iwashima struct inode *inode = file_inode(filp); 87f4e65870SJens Axboe 8899a7a5b9SKuniyuki Iwashima /* Socket ? */ 8999a7a5b9SKuniyuki Iwashima if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) { 9099a7a5b9SKuniyuki Iwashima struct socket *sock = SOCKET_I(inode); 9199a7a5b9SKuniyuki Iwashima const struct proto_ops *ops; 9299a7a5b9SKuniyuki Iwashima struct sock *sk = sock->sk; 931da177e4SLinus Torvalds 9499a7a5b9SKuniyuki Iwashima ops = READ_ONCE(sock->ops); 9599a7a5b9SKuniyuki Iwashima 9699a7a5b9SKuniyuki Iwashima /* PF_UNIX ? */ 9799a7a5b9SKuniyuki Iwashima if (sk && ops && ops->family == PF_UNIX) 9899a7a5b9SKuniyuki Iwashima return unix_sk(sk); 9999a7a5b9SKuniyuki Iwashima } 10099a7a5b9SKuniyuki Iwashima 10199a7a5b9SKuniyuki Iwashima return NULL; 10299a7a5b9SKuniyuki Iwashima } 10399a7a5b9SKuniyuki Iwashima 10442f298c0SKuniyuki Iwashima static LIST_HEAD(unix_unvisited_vertices); 10542f298c0SKuniyuki Iwashima 106*6ba76fd2SKuniyuki Iwashima enum unix_vertex_index { 107*6ba76fd2SKuniyuki Iwashima UNIX_VERTEX_INDEX_UNVISITED, 108*6ba76fd2SKuniyuki Iwashima UNIX_VERTEX_INDEX_START, 109*6ba76fd2SKuniyuki Iwashima }; 110*6ba76fd2SKuniyuki Iwashima 11142f298c0SKuniyuki Iwashima static void unix_add_edge(struct scm_fp_list *fpl, struct unix_edge *edge) 11242f298c0SKuniyuki Iwashima { 11342f298c0SKuniyuki Iwashima struct unix_vertex *vertex = edge->predecessor->vertex; 11442f298c0SKuniyuki Iwashima 11542f298c0SKuniyuki Iwashima if (!vertex) { 11642f298c0SKuniyuki Iwashima vertex = list_first_entry(&fpl->vertices, typeof(*vertex), entry); 11742f298c0SKuniyuki Iwashima vertex->out_degree = 0; 11842f298c0SKuniyuki Iwashima INIT_LIST_HEAD(&vertex->edges); 11942f298c0SKuniyuki Iwashima 12042f298c0SKuniyuki Iwashima list_move_tail(&vertex->entry, &unix_unvisited_vertices); 12142f298c0SKuniyuki Iwashima edge->predecessor->vertex = vertex; 12242f298c0SKuniyuki Iwashima } 12342f298c0SKuniyuki Iwashima 12442f298c0SKuniyuki Iwashima vertex->out_degree++; 12542f298c0SKuniyuki Iwashima list_add_tail(&edge->vertex_entry, &vertex->edges); 12642f298c0SKuniyuki Iwashima } 12742f298c0SKuniyuki Iwashima 12842f298c0SKuniyuki Iwashima static void unix_del_edge(struct scm_fp_list *fpl, struct unix_edge *edge) 12942f298c0SKuniyuki Iwashima { 13042f298c0SKuniyuki Iwashima struct unix_vertex *vertex = edge->predecessor->vertex; 13142f298c0SKuniyuki Iwashima 13242f298c0SKuniyuki Iwashima list_del(&edge->vertex_entry); 13342f298c0SKuniyuki Iwashima vertex->out_degree--; 13442f298c0SKuniyuki Iwashima 13542f298c0SKuniyuki Iwashima if (!vertex->out_degree) { 13642f298c0SKuniyuki Iwashima edge->predecessor->vertex = NULL; 13742f298c0SKuniyuki Iwashima list_move_tail(&vertex->entry, &fpl->vertices); 13842f298c0SKuniyuki Iwashima } 13942f298c0SKuniyuki Iwashima } 14042f298c0SKuniyuki Iwashima 1411fbfdfaaSKuniyuki Iwashima static void unix_free_vertices(struct scm_fp_list *fpl) 1421fbfdfaaSKuniyuki Iwashima { 1431fbfdfaaSKuniyuki Iwashima struct unix_vertex *vertex, *next_vertex; 1441fbfdfaaSKuniyuki Iwashima 1451fbfdfaaSKuniyuki Iwashima list_for_each_entry_safe(vertex, next_vertex, &fpl->vertices, entry) { 1461fbfdfaaSKuniyuki Iwashima list_del(&vertex->entry); 1471fbfdfaaSKuniyuki Iwashima kfree(vertex); 1481fbfdfaaSKuniyuki Iwashima } 1491fbfdfaaSKuniyuki Iwashima } 1501fbfdfaaSKuniyuki Iwashima 15142f298c0SKuniyuki Iwashima DEFINE_SPINLOCK(unix_gc_lock); 15222c3c0c5SKuniyuki Iwashima unsigned int unix_tot_inflight; 15342f298c0SKuniyuki Iwashima 15442f298c0SKuniyuki Iwashima void unix_add_edges(struct scm_fp_list *fpl, struct unix_sock *receiver) 15542f298c0SKuniyuki Iwashima { 15642f298c0SKuniyuki Iwashima int i = 0, j = 0; 15742f298c0SKuniyuki Iwashima 15842f298c0SKuniyuki Iwashima spin_lock(&unix_gc_lock); 15942f298c0SKuniyuki Iwashima 16042f298c0SKuniyuki Iwashima if (!fpl->count_unix) 16142f298c0SKuniyuki Iwashima goto out; 16242f298c0SKuniyuki Iwashima 16342f298c0SKuniyuki Iwashima do { 16442f298c0SKuniyuki Iwashima struct unix_sock *inflight = unix_get_socket(fpl->fp[j++]); 16542f298c0SKuniyuki Iwashima struct unix_edge *edge; 16642f298c0SKuniyuki Iwashima 16742f298c0SKuniyuki Iwashima if (!inflight) 16842f298c0SKuniyuki Iwashima continue; 16942f298c0SKuniyuki Iwashima 17042f298c0SKuniyuki Iwashima edge = fpl->edges + i++; 17142f298c0SKuniyuki Iwashima edge->predecessor = inflight; 17242f298c0SKuniyuki Iwashima edge->successor = receiver; 17342f298c0SKuniyuki Iwashima 17442f298c0SKuniyuki Iwashima unix_add_edge(fpl, edge); 17542f298c0SKuniyuki Iwashima } while (i < fpl->count_unix); 17642f298c0SKuniyuki Iwashima 17722c3c0c5SKuniyuki Iwashima WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + fpl->count_unix); 17842f298c0SKuniyuki Iwashima out: 17922c3c0c5SKuniyuki Iwashima WRITE_ONCE(fpl->user->unix_inflight, fpl->user->unix_inflight + fpl->count); 18022c3c0c5SKuniyuki Iwashima 18142f298c0SKuniyuki Iwashima spin_unlock(&unix_gc_lock); 18242f298c0SKuniyuki Iwashima 18342f298c0SKuniyuki Iwashima fpl->inflight = true; 18442f298c0SKuniyuki Iwashima 18542f298c0SKuniyuki Iwashima unix_free_vertices(fpl); 18642f298c0SKuniyuki Iwashima } 18742f298c0SKuniyuki Iwashima 18842f298c0SKuniyuki Iwashima void unix_del_edges(struct scm_fp_list *fpl) 18942f298c0SKuniyuki Iwashima { 19042f298c0SKuniyuki Iwashima int i = 0; 19142f298c0SKuniyuki Iwashima 19242f298c0SKuniyuki Iwashima spin_lock(&unix_gc_lock); 19342f298c0SKuniyuki Iwashima 19442f298c0SKuniyuki Iwashima if (!fpl->count_unix) 19542f298c0SKuniyuki Iwashima goto out; 19642f298c0SKuniyuki Iwashima 19742f298c0SKuniyuki Iwashima do { 19842f298c0SKuniyuki Iwashima struct unix_edge *edge = fpl->edges + i++; 19942f298c0SKuniyuki Iwashima 20042f298c0SKuniyuki Iwashima unix_del_edge(fpl, edge); 20142f298c0SKuniyuki Iwashima } while (i < fpl->count_unix); 20242f298c0SKuniyuki Iwashima 20322c3c0c5SKuniyuki Iwashima WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - fpl->count_unix); 20442f298c0SKuniyuki Iwashima out: 20522c3c0c5SKuniyuki Iwashima WRITE_ONCE(fpl->user->unix_inflight, fpl->user->unix_inflight - fpl->count); 20622c3c0c5SKuniyuki Iwashima 20742f298c0SKuniyuki Iwashima spin_unlock(&unix_gc_lock); 20842f298c0SKuniyuki Iwashima 20942f298c0SKuniyuki Iwashima fpl->inflight = false; 21042f298c0SKuniyuki Iwashima } 21142f298c0SKuniyuki Iwashima 2121fbfdfaaSKuniyuki Iwashima int unix_prepare_fpl(struct scm_fp_list *fpl) 2131fbfdfaaSKuniyuki Iwashima { 2141fbfdfaaSKuniyuki Iwashima struct unix_vertex *vertex; 2151fbfdfaaSKuniyuki Iwashima int i; 2161fbfdfaaSKuniyuki Iwashima 2171fbfdfaaSKuniyuki Iwashima if (!fpl->count_unix) 2181fbfdfaaSKuniyuki Iwashima return 0; 2191fbfdfaaSKuniyuki Iwashima 2201fbfdfaaSKuniyuki Iwashima for (i = 0; i < fpl->count_unix; i++) { 2211fbfdfaaSKuniyuki Iwashima vertex = kmalloc(sizeof(*vertex), GFP_KERNEL); 2221fbfdfaaSKuniyuki Iwashima if (!vertex) 2231fbfdfaaSKuniyuki Iwashima goto err; 2241fbfdfaaSKuniyuki Iwashima 2251fbfdfaaSKuniyuki Iwashima list_add(&vertex->entry, &fpl->vertices); 2261fbfdfaaSKuniyuki Iwashima } 2271fbfdfaaSKuniyuki Iwashima 22829b64e35SKuniyuki Iwashima fpl->edges = kvmalloc_array(fpl->count_unix, sizeof(*fpl->edges), 22929b64e35SKuniyuki Iwashima GFP_KERNEL_ACCOUNT); 23029b64e35SKuniyuki Iwashima if (!fpl->edges) 23129b64e35SKuniyuki Iwashima goto err; 23229b64e35SKuniyuki Iwashima 2331fbfdfaaSKuniyuki Iwashima return 0; 2341fbfdfaaSKuniyuki Iwashima 2351fbfdfaaSKuniyuki Iwashima err: 2361fbfdfaaSKuniyuki Iwashima unix_free_vertices(fpl); 2371fbfdfaaSKuniyuki Iwashima return -ENOMEM; 2381fbfdfaaSKuniyuki Iwashima } 2391fbfdfaaSKuniyuki Iwashima 2401fbfdfaaSKuniyuki Iwashima void unix_destroy_fpl(struct scm_fp_list *fpl) 2411fbfdfaaSKuniyuki Iwashima { 24242f298c0SKuniyuki Iwashima if (fpl->inflight) 24342f298c0SKuniyuki Iwashima unix_del_edges(fpl); 24442f298c0SKuniyuki Iwashima 24529b64e35SKuniyuki Iwashima kvfree(fpl->edges); 2461fbfdfaaSKuniyuki Iwashima unix_free_vertices(fpl); 2471fbfdfaaSKuniyuki Iwashima } 2481fbfdfaaSKuniyuki Iwashima 249*6ba76fd2SKuniyuki Iwashima static LIST_HEAD(unix_visited_vertices); 250*6ba76fd2SKuniyuki Iwashima 251*6ba76fd2SKuniyuki Iwashima static void __unix_walk_scc(struct unix_vertex *vertex) 252*6ba76fd2SKuniyuki Iwashima { 253*6ba76fd2SKuniyuki Iwashima unsigned long index = UNIX_VERTEX_INDEX_START; 254*6ba76fd2SKuniyuki Iwashima struct unix_edge *edge; 255*6ba76fd2SKuniyuki Iwashima LIST_HEAD(edge_stack); 256*6ba76fd2SKuniyuki Iwashima 257*6ba76fd2SKuniyuki Iwashima next_vertex: 258*6ba76fd2SKuniyuki Iwashima vertex->index = index; 259*6ba76fd2SKuniyuki Iwashima index++; 260*6ba76fd2SKuniyuki Iwashima 261*6ba76fd2SKuniyuki Iwashima /* Explore neighbour vertices (receivers of the current vertex's fd). */ 262*6ba76fd2SKuniyuki Iwashima list_for_each_entry(edge, &vertex->edges, vertex_entry) { 263*6ba76fd2SKuniyuki Iwashima struct unix_vertex *next_vertex = edge->successor->vertex; 264*6ba76fd2SKuniyuki Iwashima 265*6ba76fd2SKuniyuki Iwashima if (!next_vertex) 266*6ba76fd2SKuniyuki Iwashima continue; 267*6ba76fd2SKuniyuki Iwashima 268*6ba76fd2SKuniyuki Iwashima if (next_vertex->index == UNIX_VERTEX_INDEX_UNVISITED) { 269*6ba76fd2SKuniyuki Iwashima /* Iterative deepening depth first search 270*6ba76fd2SKuniyuki Iwashima * 271*6ba76fd2SKuniyuki Iwashima * 1. Push a forward edge to edge_stack and set 272*6ba76fd2SKuniyuki Iwashima * the successor to vertex for the next iteration. 273*6ba76fd2SKuniyuki Iwashima */ 274*6ba76fd2SKuniyuki Iwashima list_add(&edge->stack_entry, &edge_stack); 275*6ba76fd2SKuniyuki Iwashima 276*6ba76fd2SKuniyuki Iwashima vertex = next_vertex; 277*6ba76fd2SKuniyuki Iwashima goto next_vertex; 278*6ba76fd2SKuniyuki Iwashima 279*6ba76fd2SKuniyuki Iwashima /* 2. Pop the edge directed to the current vertex 280*6ba76fd2SKuniyuki Iwashima * and restore the ancestor for backtracking. 281*6ba76fd2SKuniyuki Iwashima */ 282*6ba76fd2SKuniyuki Iwashima prev_vertex: 283*6ba76fd2SKuniyuki Iwashima edge = list_first_entry(&edge_stack, typeof(*edge), stack_entry); 284*6ba76fd2SKuniyuki Iwashima list_del_init(&edge->stack_entry); 285*6ba76fd2SKuniyuki Iwashima 286*6ba76fd2SKuniyuki Iwashima vertex = edge->predecessor->vertex; 287*6ba76fd2SKuniyuki Iwashima } 288*6ba76fd2SKuniyuki Iwashima } 289*6ba76fd2SKuniyuki Iwashima 290*6ba76fd2SKuniyuki Iwashima /* Don't restart DFS from this vertex in unix_walk_scc(). */ 291*6ba76fd2SKuniyuki Iwashima list_move_tail(&vertex->entry, &unix_visited_vertices); 292*6ba76fd2SKuniyuki Iwashima 293*6ba76fd2SKuniyuki Iwashima /* Need backtracking ? */ 294*6ba76fd2SKuniyuki Iwashima if (!list_empty(&edge_stack)) 295*6ba76fd2SKuniyuki Iwashima goto prev_vertex; 296*6ba76fd2SKuniyuki Iwashima } 297*6ba76fd2SKuniyuki Iwashima 298*6ba76fd2SKuniyuki Iwashima static void unix_walk_scc(void) 299*6ba76fd2SKuniyuki Iwashima { 300*6ba76fd2SKuniyuki Iwashima struct unix_vertex *vertex; 301*6ba76fd2SKuniyuki Iwashima 302*6ba76fd2SKuniyuki Iwashima list_for_each_entry(vertex, &unix_unvisited_vertices, entry) 303*6ba76fd2SKuniyuki Iwashima vertex->index = UNIX_VERTEX_INDEX_UNVISITED; 304*6ba76fd2SKuniyuki Iwashima 305*6ba76fd2SKuniyuki Iwashima /* Visit every vertex exactly once. 306*6ba76fd2SKuniyuki Iwashima * __unix_walk_scc() moves visited vertices to unix_visited_vertices. 307*6ba76fd2SKuniyuki Iwashima */ 308*6ba76fd2SKuniyuki Iwashima while (!list_empty(&unix_unvisited_vertices)) { 309*6ba76fd2SKuniyuki Iwashima vertex = list_first_entry(&unix_unvisited_vertices, typeof(*vertex), entry); 310*6ba76fd2SKuniyuki Iwashima __unix_walk_scc(vertex); 311*6ba76fd2SKuniyuki Iwashima } 312*6ba76fd2SKuniyuki Iwashima 313*6ba76fd2SKuniyuki Iwashima list_replace_init(&unix_visited_vertices, &unix_unvisited_vertices); 314*6ba76fd2SKuniyuki Iwashima } 315*6ba76fd2SKuniyuki Iwashima 3161fd05ba5SMiklos Szeredi static LIST_HEAD(gc_candidates); 31799a7a5b9SKuniyuki Iwashima static LIST_HEAD(gc_inflight_list); 31899a7a5b9SKuniyuki Iwashima 31999a7a5b9SKuniyuki Iwashima /* Keep the number of times in flight count for the file 32099a7a5b9SKuniyuki Iwashima * descriptor if it is for an AF_UNIX socket. 32199a7a5b9SKuniyuki Iwashima */ 32299a7a5b9SKuniyuki Iwashima void unix_inflight(struct user_struct *user, struct file *filp) 32399a7a5b9SKuniyuki Iwashima { 32499a7a5b9SKuniyuki Iwashima struct unix_sock *u = unix_get_socket(filp); 32599a7a5b9SKuniyuki Iwashima 32699a7a5b9SKuniyuki Iwashima spin_lock(&unix_gc_lock); 32799a7a5b9SKuniyuki Iwashima 32899a7a5b9SKuniyuki Iwashima if (u) { 32999a7a5b9SKuniyuki Iwashima if (!u->inflight) { 33099a7a5b9SKuniyuki Iwashima WARN_ON_ONCE(!list_empty(&u->link)); 33199a7a5b9SKuniyuki Iwashima list_add_tail(&u->link, &gc_inflight_list); 33299a7a5b9SKuniyuki Iwashima } else { 33399a7a5b9SKuniyuki Iwashima WARN_ON_ONCE(list_empty(&u->link)); 33499a7a5b9SKuniyuki Iwashima } 33599a7a5b9SKuniyuki Iwashima u->inflight++; 33699a7a5b9SKuniyuki Iwashima } 33799a7a5b9SKuniyuki Iwashima 33899a7a5b9SKuniyuki Iwashima spin_unlock(&unix_gc_lock); 33999a7a5b9SKuniyuki Iwashima } 34099a7a5b9SKuniyuki Iwashima 34199a7a5b9SKuniyuki Iwashima void unix_notinflight(struct user_struct *user, struct file *filp) 34299a7a5b9SKuniyuki Iwashima { 34399a7a5b9SKuniyuki Iwashima struct unix_sock *u = unix_get_socket(filp); 34499a7a5b9SKuniyuki Iwashima 34599a7a5b9SKuniyuki Iwashima spin_lock(&unix_gc_lock); 34699a7a5b9SKuniyuki Iwashima 34799a7a5b9SKuniyuki Iwashima if (u) { 34899a7a5b9SKuniyuki Iwashima WARN_ON_ONCE(!u->inflight); 34999a7a5b9SKuniyuki Iwashima WARN_ON_ONCE(list_empty(&u->link)); 35099a7a5b9SKuniyuki Iwashima 35199a7a5b9SKuniyuki Iwashima u->inflight--; 35299a7a5b9SKuniyuki Iwashima if (!u->inflight) 35399a7a5b9SKuniyuki Iwashima list_del_init(&u->link); 35499a7a5b9SKuniyuki Iwashima } 35599a7a5b9SKuniyuki Iwashima 35699a7a5b9SKuniyuki Iwashima spin_unlock(&unix_gc_lock); 35799a7a5b9SKuniyuki Iwashima } 3581da177e4SLinus Torvalds 3595c80f1aeSPavel Emelyanov static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), 3601fd05ba5SMiklos Szeredi struct sk_buff_head *hitlist) 3611da177e4SLinus Torvalds { 3621da177e4SLinus Torvalds struct sk_buff *skb; 3631fd05ba5SMiklos Szeredi struct sk_buff *next; 3641da177e4SLinus Torvalds 3651da177e4SLinus Torvalds spin_lock(&x->sk_receive_queue.lock); 366a2f3be17SIlpo Järvinen skb_queue_walk_safe(&x->sk_receive_queue, skb, next) { 367d1ab39f1SJason Eastman /* Do we have file descriptors ? */ 3681fd05ba5SMiklos Szeredi if (UNIXCB(skb).fp) { 3691fd05ba5SMiklos Szeredi bool hit = false; 370d1ab39f1SJason Eastman /* Process the descriptors of this socket */ 3711da177e4SLinus Torvalds int nfd = UNIXCB(skb).fp->count; 3721da177e4SLinus Torvalds struct file **fp = UNIXCB(skb).fp->fp; 373d1ab39f1SJason Eastman 3741fd05ba5SMiklos Szeredi while (nfd--) { 375d1ab39f1SJason Eastman /* Get the socket the fd matches if it indeed does so */ 3765b17307bSKuniyuki Iwashima struct unix_sock *u = unix_get_socket(*fp++); 377d1ab39f1SJason Eastman 3785b17307bSKuniyuki Iwashima /* Ignore non-candidates, they could have been added 3795b17307bSKuniyuki Iwashima * to the queues after starting the garbage collection 3806209344fSMiklos Szeredi */ 3815b17307bSKuniyuki Iwashima if (u && test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) { 3821fd05ba5SMiklos Szeredi hit = true; 383d1ab39f1SJason Eastman 3846209344fSMiklos Szeredi func(u); 3856209344fSMiklos Szeredi } 3861da177e4SLinus Torvalds } 3871fd05ba5SMiklos Szeredi if (hit && hitlist != NULL) { 3881fd05ba5SMiklos Szeredi __skb_unlink(skb, &x->sk_receive_queue); 3891fd05ba5SMiklos Szeredi __skb_queue_tail(hitlist, skb); 3901da177e4SLinus Torvalds } 3911fd05ba5SMiklos Szeredi } 3921da177e4SLinus Torvalds } 3931da177e4SLinus Torvalds spin_unlock(&x->sk_receive_queue.lock); 3941da177e4SLinus Torvalds } 3951da177e4SLinus Torvalds 3965c80f1aeSPavel Emelyanov static void scan_children(struct sock *x, void (*func)(struct unix_sock *), 3971fd05ba5SMiklos Szeredi struct sk_buff_head *hitlist) 3981da177e4SLinus Torvalds { 399d1ab39f1SJason Eastman if (x->sk_state != TCP_LISTEN) { 4001fd05ba5SMiklos Szeredi scan_inflight(x, func, hitlist); 401d1ab39f1SJason Eastman } else { 4021fd05ba5SMiklos Szeredi struct sk_buff *skb; 4031fd05ba5SMiklos Szeredi struct sk_buff *next; 4041fd05ba5SMiklos Szeredi struct unix_sock *u; 4051fd05ba5SMiklos Szeredi LIST_HEAD(embryos); 4061da177e4SLinus Torvalds 407d1ab39f1SJason Eastman /* For a listening socket collect the queued embryos 4081fd05ba5SMiklos Szeredi * and perform a scan on them as well. 4091da177e4SLinus Torvalds */ 4101fd05ba5SMiklos Szeredi spin_lock(&x->sk_receive_queue.lock); 411a2f3be17SIlpo Järvinen skb_queue_walk_safe(&x->sk_receive_queue, skb, next) { 4121fd05ba5SMiklos Szeredi u = unix_sk(skb->sk); 4131da177e4SLinus Torvalds 414d1ab39f1SJason Eastman /* An embryo cannot be in-flight, so it's safe 4151fd05ba5SMiklos Szeredi * to use the list link. 4161fd05ba5SMiklos Szeredi */ 417d0f6dc26SKuniyuki Iwashima WARN_ON_ONCE(!list_empty(&u->link)); 4181fd05ba5SMiklos Szeredi list_add_tail(&u->link, &embryos); 4191fd05ba5SMiklos Szeredi } 4201fd05ba5SMiklos Szeredi spin_unlock(&x->sk_receive_queue.lock); 4211fd05ba5SMiklos Szeredi 4221fd05ba5SMiklos Szeredi while (!list_empty(&embryos)) { 4231fd05ba5SMiklos Szeredi u = list_entry(embryos.next, struct unix_sock, link); 4241fd05ba5SMiklos Szeredi scan_inflight(&u->sk, func, hitlist); 4251fd05ba5SMiklos Szeredi list_del_init(&u->link); 4261fd05ba5SMiklos Szeredi } 4271fd05ba5SMiklos Szeredi } 4281fd05ba5SMiklos Szeredi } 4291fd05ba5SMiklos Szeredi 4305c80f1aeSPavel Emelyanov static void dec_inflight(struct unix_sock *usk) 4311fd05ba5SMiklos Szeredi { 43297af84a6SKuniyuki Iwashima usk->inflight--; 4331fd05ba5SMiklos Szeredi } 4341fd05ba5SMiklos Szeredi 4355c80f1aeSPavel Emelyanov static void inc_inflight(struct unix_sock *usk) 4361fd05ba5SMiklos Szeredi { 43797af84a6SKuniyuki Iwashima usk->inflight++; 4381fd05ba5SMiklos Szeredi } 4391fd05ba5SMiklos Szeredi 4405c80f1aeSPavel Emelyanov static void inc_inflight_move_tail(struct unix_sock *u) 4411fd05ba5SMiklos Szeredi { 44297af84a6SKuniyuki Iwashima u->inflight++; 44397af84a6SKuniyuki Iwashima 444d1ab39f1SJason Eastman /* If this still might be part of a cycle, move it to the end 4456209344fSMiklos Szeredi * of the list, so that it's checked even if it was already 4466209344fSMiklos Szeredi * passed over 4471fd05ba5SMiklos Szeredi */ 44860bc851aSEric Dumazet if (test_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags)) 4491fd05ba5SMiklos Szeredi list_move_tail(&u->link, &gc_candidates); 4501fd05ba5SMiklos Szeredi } 4511fd05ba5SMiklos Szeredi 452505e907dSFabian Frederick static bool gc_in_progress; 4531fd05ba5SMiklos Szeredi 4548b90a9f8SKuniyuki Iwashima static void __unix_gc(struct work_struct *work) 4555f23b734Sdann frazier { 4561fd05ba5SMiklos Szeredi struct sk_buff_head hitlist; 45711498715SKuniyuki Iwashima struct unix_sock *u, *next; 4586209344fSMiklos Szeredi LIST_HEAD(not_cycle_list); 45911498715SKuniyuki Iwashima struct list_head cursor; 4601fd05ba5SMiklos Szeredi 4611fd05ba5SMiklos Szeredi spin_lock(&unix_gc_lock); 4621fd05ba5SMiklos Szeredi 463*6ba76fd2SKuniyuki Iwashima unix_walk_scc(); 464*6ba76fd2SKuniyuki Iwashima 465d1ab39f1SJason Eastman /* First, select candidates for garbage collection. Only 4661fd05ba5SMiklos Szeredi * in-flight sockets are considered, and from those only ones 4671fd05ba5SMiklos Szeredi * which don't have any external reference. 4681fd05ba5SMiklos Szeredi * 4691fd05ba5SMiklos Szeredi * Holding unix_gc_lock will protect these candidates from 4701fd05ba5SMiklos Szeredi * being detached, and hence from gaining an external 4716209344fSMiklos Szeredi * reference. Since there are no possible receivers, all 4726209344fSMiklos Szeredi * buffers currently on the candidates' queues stay there 4736209344fSMiklos Szeredi * during the garbage collection. 4746209344fSMiklos Szeredi * 4756209344fSMiklos Szeredi * We also know that no new candidate can be added onto the 4766209344fSMiklos Szeredi * receive queues. Other, non candidate sockets _can_ be 4776209344fSMiklos Szeredi * added to queue, so we must make sure only to touch 4786209344fSMiklos Szeredi * candidates. 4791fd05ba5SMiklos Szeredi */ 4801fd05ba5SMiklos Szeredi list_for_each_entry_safe(u, next, &gc_inflight_list, link) { 481516e0cc5SAl Viro long total_refs; 4821fd05ba5SMiklos Szeredi 4831fd05ba5SMiklos Szeredi total_refs = file_count(u->sk.sk_socket->file); 4841fd05ba5SMiklos Szeredi 485d0f6dc26SKuniyuki Iwashima WARN_ON_ONCE(!u->inflight); 486d0f6dc26SKuniyuki Iwashima WARN_ON_ONCE(total_refs < u->inflight); 48797af84a6SKuniyuki Iwashima if (total_refs == u->inflight) { 4881fd05ba5SMiklos Szeredi list_move_tail(&u->link, &gc_candidates); 48960bc851aSEric Dumazet __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags); 49060bc851aSEric Dumazet __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); 4911fd05ba5SMiklos Szeredi } 4921fd05ba5SMiklos Szeredi } 4931fd05ba5SMiklos Szeredi 494d1ab39f1SJason Eastman /* Now remove all internal in-flight reference to children of 4951fd05ba5SMiklos Szeredi * the candidates. 4961fd05ba5SMiklos Szeredi */ 4971fd05ba5SMiklos Szeredi list_for_each_entry(u, &gc_candidates, link) 4981fd05ba5SMiklos Szeredi scan_children(&u->sk, dec_inflight, NULL); 4991fd05ba5SMiklos Szeredi 500d1ab39f1SJason Eastman /* Restore the references for children of all candidates, 5011fd05ba5SMiklos Szeredi * which have remaining references. Do this recursively, so 5021fd05ba5SMiklos Szeredi * only those remain, which form cyclic references. 5031fd05ba5SMiklos Szeredi * 5041fd05ba5SMiklos Szeredi * Use a "cursor" link, to make the list traversal safe, even 5051fd05ba5SMiklos Szeredi * though elements might be moved about. 5061fd05ba5SMiklos Szeredi */ 5071fd05ba5SMiklos Szeredi list_add(&cursor, &gc_candidates); 5081fd05ba5SMiklos Szeredi while (cursor.next != &gc_candidates) { 5091fd05ba5SMiklos Szeredi u = list_entry(cursor.next, struct unix_sock, link); 5101fd05ba5SMiklos Szeredi 5111fd05ba5SMiklos Szeredi /* Move cursor to after the current position. */ 5121fd05ba5SMiklos Szeredi list_move(&cursor, &u->link); 5131fd05ba5SMiklos Szeredi 51497af84a6SKuniyuki Iwashima if (u->inflight) { 5156209344fSMiklos Szeredi list_move_tail(&u->link, ¬_cycle_list); 51660bc851aSEric Dumazet __clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); 5171fd05ba5SMiklos Szeredi scan_children(&u->sk, inc_inflight_move_tail, NULL); 5181fd05ba5SMiklos Szeredi } 5191fd05ba5SMiklos Szeredi } 5201fd05ba5SMiklos Szeredi list_del(&cursor); 5211fd05ba5SMiklos Szeredi 5227df9c246SAndrey Ulanov /* Now gc_candidates contains only garbage. Restore original 5237df9c246SAndrey Ulanov * inflight counters for these as well, and remove the skbuffs 5247df9c246SAndrey Ulanov * which are creating the cycle(s). 5257df9c246SAndrey Ulanov */ 5267df9c246SAndrey Ulanov skb_queue_head_init(&hitlist); 527aa82ac51SKuniyuki Iwashima list_for_each_entry(u, &gc_candidates, link) { 5287df9c246SAndrey Ulanov scan_children(&u->sk, inc_inflight, &hitlist); 5297df9c246SAndrey Ulanov 530aa82ac51SKuniyuki Iwashima #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 531aa82ac51SKuniyuki Iwashima if (u->oob_skb) { 532aa82ac51SKuniyuki Iwashima kfree_skb(u->oob_skb); 533aa82ac51SKuniyuki Iwashima u->oob_skb = NULL; 534aa82ac51SKuniyuki Iwashima } 535aa82ac51SKuniyuki Iwashima #endif 536aa82ac51SKuniyuki Iwashima } 537aa82ac51SKuniyuki Iwashima 538d1ab39f1SJason Eastman /* not_cycle_list contains those sockets which do not make up a 5396209344fSMiklos Szeredi * cycle. Restore these to the inflight list. 5406209344fSMiklos Szeredi */ 5416209344fSMiklos Szeredi while (!list_empty(¬_cycle_list)) { 5426209344fSMiklos Szeredi u = list_entry(not_cycle_list.next, struct unix_sock, link); 54360bc851aSEric Dumazet __clear_bit(UNIX_GC_CANDIDATE, &u->gc_flags); 5446209344fSMiklos Szeredi list_move_tail(&u->link, &gc_inflight_list); 5456209344fSMiklos Szeredi } 5466209344fSMiklos Szeredi 5471fd05ba5SMiklos Szeredi spin_unlock(&unix_gc_lock); 5481fd05ba5SMiklos Szeredi 5491fd05ba5SMiklos Szeredi /* Here we are. Hitlist is filled. Die. */ 5501da177e4SLinus Torvalds __skb_queue_purge(&hitlist); 5511fd05ba5SMiklos Szeredi 5521fd05ba5SMiklos Szeredi spin_lock(&unix_gc_lock); 5531fd05ba5SMiklos Szeredi 5541fd05ba5SMiklos Szeredi /* All candidates should have been detached by now. */ 555d0f6dc26SKuniyuki Iwashima WARN_ON_ONCE(!list_empty(&gc_candidates)); 5569d6d7f1cSEric Dumazet 5579d6d7f1cSEric Dumazet /* Paired with READ_ONCE() in wait_for_unix_gc(). */ 5589d6d7f1cSEric Dumazet WRITE_ONCE(gc_in_progress, false); 5599d6d7f1cSEric Dumazet 5601fd05ba5SMiklos Szeredi spin_unlock(&unix_gc_lock); 5611da177e4SLinus Torvalds } 5628b90a9f8SKuniyuki Iwashima 5638b90a9f8SKuniyuki Iwashima static DECLARE_WORK(unix_gc_work, __unix_gc); 5648b90a9f8SKuniyuki Iwashima 5658b90a9f8SKuniyuki Iwashima void unix_gc(void) 5668b90a9f8SKuniyuki Iwashima { 5678b90a9f8SKuniyuki Iwashima WRITE_ONCE(gc_in_progress, true); 5688b90a9f8SKuniyuki Iwashima queue_work(system_unbound_wq, &unix_gc_work); 5698b90a9f8SKuniyuki Iwashima } 5708b90a9f8SKuniyuki Iwashima 5718b90a9f8SKuniyuki Iwashima #define UNIX_INFLIGHT_TRIGGER_GC 16000 572d9f21b36SKuniyuki Iwashima #define UNIX_INFLIGHT_SANE_USER (SCM_MAX_FD * 8) 5738b90a9f8SKuniyuki Iwashima 574d9f21b36SKuniyuki Iwashima void wait_for_unix_gc(struct scm_fp_list *fpl) 5758b90a9f8SKuniyuki Iwashima { 5768b90a9f8SKuniyuki Iwashima /* If number of inflight sockets is insane, 5778b90a9f8SKuniyuki Iwashima * force a garbage collect right now. 5788b90a9f8SKuniyuki Iwashima * 5798b90a9f8SKuniyuki Iwashima * Paired with the WRITE_ONCE() in unix_inflight(), 5808b90a9f8SKuniyuki Iwashima * unix_notinflight(), and __unix_gc(). 5818b90a9f8SKuniyuki Iwashima */ 5828b90a9f8SKuniyuki Iwashima if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC && 5838b90a9f8SKuniyuki Iwashima !READ_ONCE(gc_in_progress)) 5848b90a9f8SKuniyuki Iwashima unix_gc(); 5858b90a9f8SKuniyuki Iwashima 586d9f21b36SKuniyuki Iwashima /* Penalise users who want to send AF_UNIX sockets 587d9f21b36SKuniyuki Iwashima * but whose sockets have not been received yet. 588d9f21b36SKuniyuki Iwashima */ 589d9f21b36SKuniyuki Iwashima if (!fpl || !fpl->count_unix || 590d9f21b36SKuniyuki Iwashima READ_ONCE(fpl->user->unix_inflight) < UNIX_INFLIGHT_SANE_USER) 591d9f21b36SKuniyuki Iwashima return; 592d9f21b36SKuniyuki Iwashima 5938b90a9f8SKuniyuki Iwashima if (READ_ONCE(gc_in_progress)) 5948b90a9f8SKuniyuki Iwashima flush_work(&unix_gc_work); 5958b90a9f8SKuniyuki Iwashima } 596