11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * NET3: Garbage Collector For AF_UNIX sockets 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Garbage Collector: 51da177e4SLinus Torvalds * Copyright (C) Barak A. Pearlmutter. 61da177e4SLinus Torvalds * Released under the GPL version 2 or later. 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * Chopped about by Alan Cox 22/3/96 to make it fit the AF_UNIX socket problem. 91da177e4SLinus Torvalds * If it doesn't work blame me, it worked when Barak sent it. 101da177e4SLinus Torvalds * 111da177e4SLinus Torvalds * Assumptions: 121da177e4SLinus Torvalds * 131da177e4SLinus Torvalds * - object w/ a bit 141da177e4SLinus Torvalds * - free list 151da177e4SLinus Torvalds * 161da177e4SLinus Torvalds * Current optimizations: 171da177e4SLinus Torvalds * 181da177e4SLinus Torvalds * - explicit stack instead of recursion 191da177e4SLinus Torvalds * - tail recurse on first born instead of immediate push/pop 201da177e4SLinus Torvalds * - we gather the stuff that should not be killed into tree 211da177e4SLinus Torvalds * and stack is just a path from root to the current pointer. 221da177e4SLinus Torvalds * 231da177e4SLinus Torvalds * Future optimizations: 241da177e4SLinus Torvalds * 251da177e4SLinus Torvalds * - don't just push entire root set; process in place 261da177e4SLinus Torvalds * 271da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 281da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 291da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 301da177e4SLinus Torvalds * 2 of the License, or (at your option) any later version. 311da177e4SLinus Torvalds * 321da177e4SLinus Torvalds * Fixes: 331da177e4SLinus Torvalds * Alan Cox 07 Sept 1997 Vmalloc internal stack as needed. 341da177e4SLinus Torvalds * Cope with changing max_files. 351da177e4SLinus Torvalds * Al Viro 11 Oct 1998 361da177e4SLinus Torvalds * Graph may have cycles. That is, we can send the descriptor 371da177e4SLinus Torvalds * of foo to bar and vice versa. Current code chokes on that. 381da177e4SLinus Torvalds * Fix: move SCM_RIGHTS ones into the separate list and then 391da177e4SLinus Torvalds * skb_free() them all instead of doing explicit fput's. 401da177e4SLinus Torvalds * Another problem: since fput() may block somebody may 411da177e4SLinus Torvalds * create a new unix_socket when we are in the middle of sweep 421da177e4SLinus Torvalds * phase. Fix: revert the logic wrt MARKED. Mark everything 431da177e4SLinus Torvalds * upon the beginning and unmark non-junk ones. 441da177e4SLinus Torvalds * 451da177e4SLinus Torvalds * [12 Oct 1998] AAARGH! New code purges all SCM_RIGHTS 461da177e4SLinus Torvalds * sent to connect()'ed but still not accept()'ed sockets. 471da177e4SLinus Torvalds * Fixed. Old code had slightly different problem here: 481da177e4SLinus Torvalds * extra fput() in situation when we passed the descriptor via 491da177e4SLinus Torvalds * such socket and closed it (descriptor). That would happen on 501da177e4SLinus Torvalds * each unix_gc() until the accept(). Since the struct file in 511da177e4SLinus Torvalds * question would go to the free list and might be reused... 521da177e4SLinus Torvalds * That might be the reason of random oopses on filp_close() 531da177e4SLinus Torvalds * in unrelated processes. 541da177e4SLinus Torvalds * 551da177e4SLinus Torvalds * AV 28 Feb 1999 561da177e4SLinus Torvalds * Kill the explicit allocation of stack. Now we keep the tree 571da177e4SLinus Torvalds * with root in dummy + pointer (gc_current) to one of the nodes. 581da177e4SLinus Torvalds * Stack is represented as path from gc_current to dummy. Unmark 591da177e4SLinus Torvalds * now means "add to tree". Push == "make it a son of gc_current". 601da177e4SLinus Torvalds * Pop == "move gc_current to parent". We keep only pointers to 611da177e4SLinus Torvalds * parents (->gc_tree). 621da177e4SLinus Torvalds * AV 1 Mar 1999 631da177e4SLinus Torvalds * Damn. Added missing check for ->dead in listen queues scanning. 641da177e4SLinus Torvalds * 651da177e4SLinus Torvalds */ 661da177e4SLinus Torvalds 671da177e4SLinus Torvalds #include <linux/kernel.h> 681da177e4SLinus Torvalds #include <linux/sched.h> 691da177e4SLinus Torvalds #include <linux/string.h> 701da177e4SLinus Torvalds #include <linux/socket.h> 711da177e4SLinus Torvalds #include <linux/un.h> 721da177e4SLinus Torvalds #include <linux/net.h> 731da177e4SLinus Torvalds #include <linux/fs.h> 741da177e4SLinus Torvalds #include <linux/slab.h> 751da177e4SLinus Torvalds #include <linux/skbuff.h> 761da177e4SLinus Torvalds #include <linux/netdevice.h> 771da177e4SLinus Torvalds #include <linux/file.h> 781da177e4SLinus Torvalds #include <linux/proc_fs.h> 79*4a3e2f71SArjan van de Ven #include <linux/mutex.h> 801da177e4SLinus Torvalds 811da177e4SLinus Torvalds #include <net/sock.h> 821da177e4SLinus Torvalds #include <net/af_unix.h> 831da177e4SLinus Torvalds #include <net/scm.h> 84c752f073SArnaldo Carvalho de Melo #include <net/tcp_states.h> 851da177e4SLinus Torvalds 861da177e4SLinus Torvalds /* Internal data structures and random procedures: */ 871da177e4SLinus Torvalds 881da177e4SLinus Torvalds #define GC_HEAD ((struct sock *)(-1)) 891da177e4SLinus Torvalds #define GC_ORPHAN ((struct sock *)(-3)) 901da177e4SLinus Torvalds 911da177e4SLinus Torvalds static struct sock *gc_current = GC_HEAD; /* stack of objects to mark */ 921da177e4SLinus Torvalds 931da177e4SLinus Torvalds atomic_t unix_tot_inflight = ATOMIC_INIT(0); 941da177e4SLinus Torvalds 951da177e4SLinus Torvalds 961da177e4SLinus Torvalds static struct sock *unix_get_socket(struct file *filp) 971da177e4SLinus Torvalds { 981da177e4SLinus Torvalds struct sock *u_sock = NULL; 991da177e4SLinus Torvalds struct inode *inode = filp->f_dentry->d_inode; 1001da177e4SLinus Torvalds 1011da177e4SLinus Torvalds /* 1021da177e4SLinus Torvalds * Socket ? 1031da177e4SLinus Torvalds */ 1041da177e4SLinus Torvalds if (S_ISSOCK(inode->i_mode)) { 1051da177e4SLinus Torvalds struct socket * sock = SOCKET_I(inode); 1061da177e4SLinus Torvalds struct sock * s = sock->sk; 1071da177e4SLinus Torvalds 1081da177e4SLinus Torvalds /* 1091da177e4SLinus Torvalds * PF_UNIX ? 1101da177e4SLinus Torvalds */ 1111da177e4SLinus Torvalds if (s && sock->ops && sock->ops->family == PF_UNIX) 1121da177e4SLinus Torvalds u_sock = s; 1131da177e4SLinus Torvalds } 1141da177e4SLinus Torvalds return u_sock; 1151da177e4SLinus Torvalds } 1161da177e4SLinus Torvalds 1171da177e4SLinus Torvalds /* 1181da177e4SLinus Torvalds * Keep the number of times in flight count for the file 1191da177e4SLinus Torvalds * descriptor if it is for an AF_UNIX socket. 1201da177e4SLinus Torvalds */ 1211da177e4SLinus Torvalds 1221da177e4SLinus Torvalds void unix_inflight(struct file *fp) 1231da177e4SLinus Torvalds { 1241da177e4SLinus Torvalds struct sock *s = unix_get_socket(fp); 1251da177e4SLinus Torvalds if(s) { 1261da177e4SLinus Torvalds atomic_inc(&unix_sk(s)->inflight); 1271da177e4SLinus Torvalds atomic_inc(&unix_tot_inflight); 1281da177e4SLinus Torvalds } 1291da177e4SLinus Torvalds } 1301da177e4SLinus Torvalds 1311da177e4SLinus Torvalds void unix_notinflight(struct file *fp) 1321da177e4SLinus Torvalds { 1331da177e4SLinus Torvalds struct sock *s = unix_get_socket(fp); 1341da177e4SLinus Torvalds if(s) { 1351da177e4SLinus Torvalds atomic_dec(&unix_sk(s)->inflight); 1361da177e4SLinus Torvalds atomic_dec(&unix_tot_inflight); 1371da177e4SLinus Torvalds } 1381da177e4SLinus Torvalds } 1391da177e4SLinus Torvalds 1401da177e4SLinus Torvalds 1411da177e4SLinus Torvalds /* 1421da177e4SLinus Torvalds * Garbage Collector Support Functions 1431da177e4SLinus Torvalds */ 1441da177e4SLinus Torvalds 1451da177e4SLinus Torvalds static inline struct sock *pop_stack(void) 1461da177e4SLinus Torvalds { 1471da177e4SLinus Torvalds struct sock *p = gc_current; 1481da177e4SLinus Torvalds gc_current = unix_sk(p)->gc_tree; 1491da177e4SLinus Torvalds return p; 1501da177e4SLinus Torvalds } 1511da177e4SLinus Torvalds 1521da177e4SLinus Torvalds static inline int empty_stack(void) 1531da177e4SLinus Torvalds { 1541da177e4SLinus Torvalds return gc_current == GC_HEAD; 1551da177e4SLinus Torvalds } 1561da177e4SLinus Torvalds 1571da177e4SLinus Torvalds static void maybe_unmark_and_push(struct sock *x) 1581da177e4SLinus Torvalds { 1591da177e4SLinus Torvalds struct unix_sock *u = unix_sk(x); 1601da177e4SLinus Torvalds 1611da177e4SLinus Torvalds if (u->gc_tree != GC_ORPHAN) 1621da177e4SLinus Torvalds return; 1631da177e4SLinus Torvalds sock_hold(x); 1641da177e4SLinus Torvalds u->gc_tree = gc_current; 1651da177e4SLinus Torvalds gc_current = x; 1661da177e4SLinus Torvalds } 1671da177e4SLinus Torvalds 1681da177e4SLinus Torvalds 1691da177e4SLinus Torvalds /* The external entry point: unix_gc() */ 1701da177e4SLinus Torvalds 1711da177e4SLinus Torvalds void unix_gc(void) 1721da177e4SLinus Torvalds { 173*4a3e2f71SArjan van de Ven static DEFINE_MUTEX(unix_gc_sem); 1741da177e4SLinus Torvalds int i; 1751da177e4SLinus Torvalds struct sock *s; 1761da177e4SLinus Torvalds struct sk_buff_head hitlist; 1771da177e4SLinus Torvalds struct sk_buff *skb; 1781da177e4SLinus Torvalds 1791da177e4SLinus Torvalds /* 1801da177e4SLinus Torvalds * Avoid a recursive GC. 1811da177e4SLinus Torvalds */ 1821da177e4SLinus Torvalds 183*4a3e2f71SArjan van de Ven if (!mutex_trylock(&unix_gc_sem)) 1841da177e4SLinus Torvalds return; 1851da177e4SLinus Torvalds 186fbe9cc4aSDavid S. Miller spin_lock(&unix_table_lock); 1871da177e4SLinus Torvalds 1881da177e4SLinus Torvalds forall_unix_sockets(i, s) 1891da177e4SLinus Torvalds { 1901da177e4SLinus Torvalds unix_sk(s)->gc_tree = GC_ORPHAN; 1911da177e4SLinus Torvalds } 1921da177e4SLinus Torvalds /* 1931da177e4SLinus Torvalds * Everything is now marked 1941da177e4SLinus Torvalds */ 1951da177e4SLinus Torvalds 1961da177e4SLinus Torvalds /* Invariant to be maintained: 1971da177e4SLinus Torvalds - everything unmarked is either: 1981da177e4SLinus Torvalds -- (a) on the stack, or 1991da177e4SLinus Torvalds -- (b) has all of its children unmarked 2001da177e4SLinus Torvalds - everything on the stack is always unmarked 2011da177e4SLinus Torvalds - nothing is ever pushed onto the stack twice, because: 2021da177e4SLinus Torvalds -- nothing previously unmarked is ever pushed on the stack 2031da177e4SLinus Torvalds */ 2041da177e4SLinus Torvalds 2051da177e4SLinus Torvalds /* 2061da177e4SLinus Torvalds * Push root set 2071da177e4SLinus Torvalds */ 2081da177e4SLinus Torvalds 2091da177e4SLinus Torvalds forall_unix_sockets(i, s) 2101da177e4SLinus Torvalds { 2111da177e4SLinus Torvalds int open_count = 0; 2121da177e4SLinus Torvalds 2131da177e4SLinus Torvalds /* 2141da177e4SLinus Torvalds * If all instances of the descriptor are not 2151da177e4SLinus Torvalds * in flight we are in use. 2161da177e4SLinus Torvalds * 2171da177e4SLinus Torvalds * Special case: when socket s is embrion, it may be 2181da177e4SLinus Torvalds * hashed but still not in queue of listening socket. 2191da177e4SLinus Torvalds * In this case (see unix_create1()) we set artificial 2201da177e4SLinus Torvalds * negative inflight counter to close race window. 2211da177e4SLinus Torvalds * It is trick of course and dirty one. 2221da177e4SLinus Torvalds */ 2231da177e4SLinus Torvalds if (s->sk_socket && s->sk_socket->file) 2241da177e4SLinus Torvalds open_count = file_count(s->sk_socket->file); 2251da177e4SLinus Torvalds if (open_count > atomic_read(&unix_sk(s)->inflight)) 2261da177e4SLinus Torvalds maybe_unmark_and_push(s); 2271da177e4SLinus Torvalds } 2281da177e4SLinus Torvalds 2291da177e4SLinus Torvalds /* 2301da177e4SLinus Torvalds * Mark phase 2311da177e4SLinus Torvalds */ 2321da177e4SLinus Torvalds 2331da177e4SLinus Torvalds while (!empty_stack()) 2341da177e4SLinus Torvalds { 2351da177e4SLinus Torvalds struct sock *x = pop_stack(); 2361da177e4SLinus Torvalds struct sock *sk; 2371da177e4SLinus Torvalds 2381da177e4SLinus Torvalds spin_lock(&x->sk_receive_queue.lock); 2391da177e4SLinus Torvalds skb = skb_peek(&x->sk_receive_queue); 2401da177e4SLinus Torvalds 2411da177e4SLinus Torvalds /* 2421da177e4SLinus Torvalds * Loop through all but first born 2431da177e4SLinus Torvalds */ 2441da177e4SLinus Torvalds 2451da177e4SLinus Torvalds while (skb && skb != (struct sk_buff *)&x->sk_receive_queue) { 2461da177e4SLinus Torvalds /* 2471da177e4SLinus Torvalds * Do we have file descriptors ? 2481da177e4SLinus Torvalds */ 2491da177e4SLinus Torvalds if(UNIXCB(skb).fp) 2501da177e4SLinus Torvalds { 2511da177e4SLinus Torvalds /* 2521da177e4SLinus Torvalds * Process the descriptors of this socket 2531da177e4SLinus Torvalds */ 2541da177e4SLinus Torvalds int nfd=UNIXCB(skb).fp->count; 2551da177e4SLinus Torvalds struct file **fp = UNIXCB(skb).fp->fp; 2561da177e4SLinus Torvalds while(nfd--) 2571da177e4SLinus Torvalds { 2581da177e4SLinus Torvalds /* 2591da177e4SLinus Torvalds * Get the socket the fd matches if 2601da177e4SLinus Torvalds * it indeed does so 2611da177e4SLinus Torvalds */ 2621da177e4SLinus Torvalds if((sk=unix_get_socket(*fp++))!=NULL) 2631da177e4SLinus Torvalds { 2641da177e4SLinus Torvalds maybe_unmark_and_push(sk); 2651da177e4SLinus Torvalds } 2661da177e4SLinus Torvalds } 2671da177e4SLinus Torvalds } 2681da177e4SLinus Torvalds /* We have to scan not-yet-accepted ones too */ 2691da177e4SLinus Torvalds if (x->sk_state == TCP_LISTEN) 2701da177e4SLinus Torvalds maybe_unmark_and_push(skb->sk); 2711da177e4SLinus Torvalds skb=skb->next; 2721da177e4SLinus Torvalds } 2731da177e4SLinus Torvalds spin_unlock(&x->sk_receive_queue.lock); 2741da177e4SLinus Torvalds sock_put(x); 2751da177e4SLinus Torvalds } 2761da177e4SLinus Torvalds 2771da177e4SLinus Torvalds skb_queue_head_init(&hitlist); 2781da177e4SLinus Torvalds 2791da177e4SLinus Torvalds forall_unix_sockets(i, s) 2801da177e4SLinus Torvalds { 2811da177e4SLinus Torvalds struct unix_sock *u = unix_sk(s); 2821da177e4SLinus Torvalds 2831da177e4SLinus Torvalds if (u->gc_tree == GC_ORPHAN) { 2841da177e4SLinus Torvalds struct sk_buff *nextsk; 2851da177e4SLinus Torvalds 2861da177e4SLinus Torvalds spin_lock(&s->sk_receive_queue.lock); 2871da177e4SLinus Torvalds skb = skb_peek(&s->sk_receive_queue); 2881da177e4SLinus Torvalds while (skb && 2891da177e4SLinus Torvalds skb != (struct sk_buff *)&s->sk_receive_queue) { 2901da177e4SLinus Torvalds nextsk = skb->next; 2911da177e4SLinus Torvalds /* 2921da177e4SLinus Torvalds * Do we have file descriptors ? 2931da177e4SLinus Torvalds */ 2948728b834SDavid S. Miller if (UNIXCB(skb).fp) { 2958728b834SDavid S. Miller __skb_unlink(skb, 2968728b834SDavid S. Miller &s->sk_receive_queue); 2971da177e4SLinus Torvalds __skb_queue_tail(&hitlist, skb); 2981da177e4SLinus Torvalds } 2991da177e4SLinus Torvalds skb = nextsk; 3001da177e4SLinus Torvalds } 3011da177e4SLinus Torvalds spin_unlock(&s->sk_receive_queue.lock); 3021da177e4SLinus Torvalds } 3031da177e4SLinus Torvalds u->gc_tree = GC_ORPHAN; 3041da177e4SLinus Torvalds } 305fbe9cc4aSDavid S. Miller spin_unlock(&unix_table_lock); 3061da177e4SLinus Torvalds 3071da177e4SLinus Torvalds /* 3081da177e4SLinus Torvalds * Here we are. Hitlist is filled. Die. 3091da177e4SLinus Torvalds */ 3101da177e4SLinus Torvalds 3111da177e4SLinus Torvalds __skb_queue_purge(&hitlist); 312*4a3e2f71SArjan van de Ven mutex_unlock(&unix_gc_sem); 3131da177e4SLinus Torvalds } 314