17eb95156SPavel Emelyanov /* 27eb95156SPavel Emelyanov * inet fragments management 37eb95156SPavel Emelyanov * 47eb95156SPavel Emelyanov * This program is free software; you can redistribute it and/or 57eb95156SPavel Emelyanov * modify it under the terms of the GNU General Public License 67eb95156SPavel Emelyanov * as published by the Free Software Foundation; either version 77eb95156SPavel Emelyanov * 2 of the License, or (at your option) any later version. 87eb95156SPavel Emelyanov * 97eb95156SPavel Emelyanov * Authors: Pavel Emelyanov <xemul@openvz.org> 107eb95156SPavel Emelyanov * Started as consolidation of ipv4/ip_fragment.c, 117eb95156SPavel Emelyanov * ipv6/reassembly. and ipv6 nf conntrack reassembly 127eb95156SPavel Emelyanov */ 137eb95156SPavel Emelyanov 147eb95156SPavel Emelyanov #include <linux/list.h> 157eb95156SPavel Emelyanov #include <linux/spinlock.h> 167eb95156SPavel Emelyanov #include <linux/module.h> 177eb95156SPavel Emelyanov #include <linux/timer.h> 187eb95156SPavel Emelyanov #include <linux/mm.h> 19321a3a99SPavel Emelyanov #include <linux/random.h> 201e4b8287SPavel Emelyanov #include <linux/skbuff.h> 211e4b8287SPavel Emelyanov #include <linux/rtnetlink.h> 227eb95156SPavel Emelyanov 237eb95156SPavel Emelyanov #include <net/inet_frag.h> 247eb95156SPavel Emelyanov 25321a3a99SPavel Emelyanov static void inet_frag_secret_rebuild(unsigned long dummy) 26321a3a99SPavel Emelyanov { 27321a3a99SPavel Emelyanov struct inet_frags *f = (struct inet_frags *)dummy; 28321a3a99SPavel Emelyanov unsigned long now = jiffies; 29321a3a99SPavel Emelyanov int i; 30321a3a99SPavel Emelyanov 31321a3a99SPavel Emelyanov write_lock(&f->lock); 32321a3a99SPavel Emelyanov get_random_bytes(&f->rnd, sizeof(u32)); 33321a3a99SPavel Emelyanov for (i = 0; i < INETFRAGS_HASHSZ; i++) { 34321a3a99SPavel Emelyanov struct inet_frag_queue *q; 35321a3a99SPavel Emelyanov struct hlist_node *p, *n; 36321a3a99SPavel Emelyanov 37321a3a99SPavel Emelyanov hlist_for_each_entry_safe(q, p, n, &f->hash[i], list) { 38321a3a99SPavel Emelyanov unsigned int hval = f->hashfn(q); 39321a3a99SPavel Emelyanov 40321a3a99SPavel Emelyanov if (hval != i) { 41321a3a99SPavel Emelyanov hlist_del(&q->list); 42321a3a99SPavel Emelyanov 43321a3a99SPavel Emelyanov /* Relink to new hash chain. */ 44321a3a99SPavel Emelyanov hlist_add_head(&q->list, &f->hash[hval]); 45321a3a99SPavel Emelyanov } 46321a3a99SPavel Emelyanov } 47321a3a99SPavel Emelyanov } 48321a3a99SPavel Emelyanov write_unlock(&f->lock); 49321a3a99SPavel Emelyanov 50321a3a99SPavel Emelyanov mod_timer(&f->secret_timer, now + f->ctl->secret_interval); 51321a3a99SPavel Emelyanov } 52321a3a99SPavel Emelyanov 537eb95156SPavel Emelyanov void inet_frags_init(struct inet_frags *f) 547eb95156SPavel Emelyanov { 557eb95156SPavel Emelyanov int i; 567eb95156SPavel Emelyanov 577eb95156SPavel Emelyanov for (i = 0; i < INETFRAGS_HASHSZ; i++) 587eb95156SPavel Emelyanov INIT_HLIST_HEAD(&f->hash[i]); 597eb95156SPavel Emelyanov 607eb95156SPavel Emelyanov INIT_LIST_HEAD(&f->lru_list); 617eb95156SPavel Emelyanov rwlock_init(&f->lock); 627eb95156SPavel Emelyanov 637eb95156SPavel Emelyanov f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ 647eb95156SPavel Emelyanov (jiffies ^ (jiffies >> 6))); 657eb95156SPavel Emelyanov 667eb95156SPavel Emelyanov f->nqueues = 0; 677eb95156SPavel Emelyanov atomic_set(&f->mem, 0); 687eb95156SPavel Emelyanov 69321a3a99SPavel Emelyanov init_timer(&f->secret_timer); 70321a3a99SPavel Emelyanov f->secret_timer.function = inet_frag_secret_rebuild; 71321a3a99SPavel Emelyanov f->secret_timer.data = (unsigned long)f; 72321a3a99SPavel Emelyanov f->secret_timer.expires = jiffies + f->ctl->secret_interval; 73321a3a99SPavel Emelyanov add_timer(&f->secret_timer); 747eb95156SPavel Emelyanov } 757eb95156SPavel Emelyanov EXPORT_SYMBOL(inet_frags_init); 767eb95156SPavel Emelyanov 777eb95156SPavel Emelyanov void inet_frags_fini(struct inet_frags *f) 787eb95156SPavel Emelyanov { 79321a3a99SPavel Emelyanov del_timer(&f->secret_timer); 807eb95156SPavel Emelyanov } 817eb95156SPavel Emelyanov EXPORT_SYMBOL(inet_frags_fini); 82277e650dSPavel Emelyanov 83277e650dSPavel Emelyanov static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) 84277e650dSPavel Emelyanov { 85277e650dSPavel Emelyanov write_lock(&f->lock); 86277e650dSPavel Emelyanov hlist_del(&fq->list); 87277e650dSPavel Emelyanov list_del(&fq->lru_list); 88277e650dSPavel Emelyanov f->nqueues--; 89277e650dSPavel Emelyanov write_unlock(&f->lock); 90277e650dSPavel Emelyanov } 91277e650dSPavel Emelyanov 92277e650dSPavel Emelyanov void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) 93277e650dSPavel Emelyanov { 94277e650dSPavel Emelyanov if (del_timer(&fq->timer)) 95277e650dSPavel Emelyanov atomic_dec(&fq->refcnt); 96277e650dSPavel Emelyanov 97277e650dSPavel Emelyanov if (!(fq->last_in & COMPLETE)) { 98277e650dSPavel Emelyanov fq_unlink(fq, f); 99277e650dSPavel Emelyanov atomic_dec(&fq->refcnt); 100277e650dSPavel Emelyanov fq->last_in |= COMPLETE; 101277e650dSPavel Emelyanov } 102277e650dSPavel Emelyanov } 103277e650dSPavel Emelyanov 104277e650dSPavel Emelyanov EXPORT_SYMBOL(inet_frag_kill); 1051e4b8287SPavel Emelyanov 1061e4b8287SPavel Emelyanov static inline void frag_kfree_skb(struct inet_frags *f, struct sk_buff *skb, 1071e4b8287SPavel Emelyanov int *work) 1081e4b8287SPavel Emelyanov { 1091e4b8287SPavel Emelyanov if (work) 1101e4b8287SPavel Emelyanov *work -= skb->truesize; 1111e4b8287SPavel Emelyanov 1121e4b8287SPavel Emelyanov atomic_sub(skb->truesize, &f->mem); 1131e4b8287SPavel Emelyanov if (f->skb_free) 1141e4b8287SPavel Emelyanov f->skb_free(skb); 1151e4b8287SPavel Emelyanov kfree_skb(skb); 1161e4b8287SPavel Emelyanov } 1171e4b8287SPavel Emelyanov 1181e4b8287SPavel Emelyanov void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, 1191e4b8287SPavel Emelyanov int *work) 1201e4b8287SPavel Emelyanov { 1211e4b8287SPavel Emelyanov struct sk_buff *fp; 1221e4b8287SPavel Emelyanov 1231e4b8287SPavel Emelyanov BUG_TRAP(q->last_in & COMPLETE); 1241e4b8287SPavel Emelyanov BUG_TRAP(del_timer(&q->timer) == 0); 1251e4b8287SPavel Emelyanov 1261e4b8287SPavel Emelyanov /* Release all fragment data. */ 1271e4b8287SPavel Emelyanov fp = q->fragments; 1281e4b8287SPavel Emelyanov while (fp) { 1291e4b8287SPavel Emelyanov struct sk_buff *xp = fp->next; 1301e4b8287SPavel Emelyanov 1311e4b8287SPavel Emelyanov frag_kfree_skb(f, fp, work); 1321e4b8287SPavel Emelyanov fp = xp; 1331e4b8287SPavel Emelyanov } 1341e4b8287SPavel Emelyanov 1351e4b8287SPavel Emelyanov if (work) 1361e4b8287SPavel Emelyanov *work -= f->qsize; 1371e4b8287SPavel Emelyanov atomic_sub(f->qsize, &f->mem); 1381e4b8287SPavel Emelyanov 1391e4b8287SPavel Emelyanov f->destructor(q); 1401e4b8287SPavel Emelyanov 1411e4b8287SPavel Emelyanov } 1421e4b8287SPavel Emelyanov EXPORT_SYMBOL(inet_frag_destroy); 1438e7999c4SPavel Emelyanov 1448e7999c4SPavel Emelyanov int inet_frag_evictor(struct inet_frags *f) 1458e7999c4SPavel Emelyanov { 1468e7999c4SPavel Emelyanov struct inet_frag_queue *q; 1478e7999c4SPavel Emelyanov int work, evicted = 0; 1488e7999c4SPavel Emelyanov 1498e7999c4SPavel Emelyanov work = atomic_read(&f->mem) - f->ctl->low_thresh; 1508e7999c4SPavel Emelyanov while (work > 0) { 1518e7999c4SPavel Emelyanov read_lock(&f->lock); 1528e7999c4SPavel Emelyanov if (list_empty(&f->lru_list)) { 1538e7999c4SPavel Emelyanov read_unlock(&f->lock); 1548e7999c4SPavel Emelyanov break; 1558e7999c4SPavel Emelyanov } 1568e7999c4SPavel Emelyanov 1578e7999c4SPavel Emelyanov q = list_first_entry(&f->lru_list, 1588e7999c4SPavel Emelyanov struct inet_frag_queue, lru_list); 1598e7999c4SPavel Emelyanov atomic_inc(&q->refcnt); 1608e7999c4SPavel Emelyanov read_unlock(&f->lock); 1618e7999c4SPavel Emelyanov 1628e7999c4SPavel Emelyanov spin_lock(&q->lock); 1638e7999c4SPavel Emelyanov if (!(q->last_in & COMPLETE)) 1648e7999c4SPavel Emelyanov inet_frag_kill(q, f); 1658e7999c4SPavel Emelyanov spin_unlock(&q->lock); 1668e7999c4SPavel Emelyanov 1678e7999c4SPavel Emelyanov if (atomic_dec_and_test(&q->refcnt)) 1688e7999c4SPavel Emelyanov inet_frag_destroy(q, f, &work); 1698e7999c4SPavel Emelyanov evicted++; 1708e7999c4SPavel Emelyanov } 1718e7999c4SPavel Emelyanov 1728e7999c4SPavel Emelyanov return evicted; 1738e7999c4SPavel Emelyanov } 1748e7999c4SPavel Emelyanov EXPORT_SYMBOL(inet_frag_evictor); 175*2588fe1dSPavel Emelyanov 176*2588fe1dSPavel Emelyanov struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in, 177*2588fe1dSPavel Emelyanov struct inet_frags *f, unsigned int hash) 178*2588fe1dSPavel Emelyanov { 179*2588fe1dSPavel Emelyanov struct inet_frag_queue *qp; 180*2588fe1dSPavel Emelyanov #ifdef CONFIG_SMP 181*2588fe1dSPavel Emelyanov struct hlist_node *n; 182*2588fe1dSPavel Emelyanov #endif 183*2588fe1dSPavel Emelyanov 184*2588fe1dSPavel Emelyanov write_lock(&f->lock); 185*2588fe1dSPavel Emelyanov #ifdef CONFIG_SMP 186*2588fe1dSPavel Emelyanov /* With SMP race we have to recheck hash table, because 187*2588fe1dSPavel Emelyanov * such entry could be created on other cpu, while we 188*2588fe1dSPavel Emelyanov * promoted read lock to write lock. 189*2588fe1dSPavel Emelyanov */ 190*2588fe1dSPavel Emelyanov hlist_for_each_entry(qp, n, &f->hash[hash], list) { 191*2588fe1dSPavel Emelyanov if (f->equal(qp, qp_in)) { 192*2588fe1dSPavel Emelyanov atomic_inc(&qp->refcnt); 193*2588fe1dSPavel Emelyanov write_unlock(&f->lock); 194*2588fe1dSPavel Emelyanov qp_in->last_in |= COMPLETE; 195*2588fe1dSPavel Emelyanov inet_frag_put(qp_in, f); 196*2588fe1dSPavel Emelyanov return qp; 197*2588fe1dSPavel Emelyanov } 198*2588fe1dSPavel Emelyanov } 199*2588fe1dSPavel Emelyanov #endif 200*2588fe1dSPavel Emelyanov qp = qp_in; 201*2588fe1dSPavel Emelyanov if (!mod_timer(&qp->timer, jiffies + f->ctl->timeout)) 202*2588fe1dSPavel Emelyanov atomic_inc(&qp->refcnt); 203*2588fe1dSPavel Emelyanov 204*2588fe1dSPavel Emelyanov atomic_inc(&qp->refcnt); 205*2588fe1dSPavel Emelyanov hlist_add_head(&qp->list, &f->hash[hash]); 206*2588fe1dSPavel Emelyanov list_add_tail(&qp->lru_list, &f->lru_list); 207*2588fe1dSPavel Emelyanov f->nqueues++; 208*2588fe1dSPavel Emelyanov write_unlock(&f->lock); 209*2588fe1dSPavel Emelyanov return qp; 210*2588fe1dSPavel Emelyanov } 211*2588fe1dSPavel Emelyanov EXPORT_SYMBOL(inet_frag_intern); 212