17eb95156SPavel Emelyanov /* 27eb95156SPavel Emelyanov * inet fragments management 37eb95156SPavel Emelyanov * 47eb95156SPavel Emelyanov * This program is free software; you can redistribute it and/or 57eb95156SPavel Emelyanov * modify it under the terms of the GNU General Public License 67eb95156SPavel Emelyanov * as published by the Free Software Foundation; either version 77eb95156SPavel Emelyanov * 2 of the License, or (at your option) any later version. 87eb95156SPavel Emelyanov * 97eb95156SPavel Emelyanov * Authors: Pavel Emelyanov <xemul@openvz.org> 107eb95156SPavel Emelyanov * Started as consolidation of ipv4/ip_fragment.c, 117eb95156SPavel Emelyanov * ipv6/reassembly. and ipv6 nf conntrack reassembly 127eb95156SPavel Emelyanov */ 137eb95156SPavel Emelyanov 147eb95156SPavel Emelyanov #include <linux/list.h> 157eb95156SPavel Emelyanov #include <linux/spinlock.h> 167eb95156SPavel Emelyanov #include <linux/module.h> 177eb95156SPavel Emelyanov #include <linux/timer.h> 187eb95156SPavel Emelyanov #include <linux/mm.h> 19321a3a99SPavel Emelyanov #include <linux/random.h> 201e4b8287SPavel Emelyanov #include <linux/skbuff.h> 211e4b8287SPavel Emelyanov #include <linux/rtnetlink.h> 225a0e3ad6STejun Heo #include <linux/slab.h> 237eb95156SPavel Emelyanov 247eb95156SPavel Emelyanov #include <net/inet_frag.h> 257eb95156SPavel Emelyanov 26321a3a99SPavel Emelyanov static void inet_frag_secret_rebuild(unsigned long dummy) 27321a3a99SPavel Emelyanov { 28321a3a99SPavel Emelyanov struct inet_frags *f = (struct inet_frags *)dummy; 29321a3a99SPavel Emelyanov unsigned long now = jiffies; 30321a3a99SPavel Emelyanov int i; 31321a3a99SPavel Emelyanov 32321a3a99SPavel Emelyanov write_lock(&f->lock); 33321a3a99SPavel Emelyanov get_random_bytes(&f->rnd, sizeof(u32)); 34321a3a99SPavel Emelyanov for (i = 0; i < INETFRAGS_HASHSZ; i++) { 35321a3a99SPavel Emelyanov struct inet_frag_queue *q; 36321a3a99SPavel Emelyanov struct hlist_node *p, *n; 37321a3a99SPavel Emelyanov 38321a3a99SPavel Emelyanov hlist_for_each_entry_safe(q, p, n, &f->hash[i], list) { 39321a3a99SPavel Emelyanov unsigned int hval = f->hashfn(q); 40321a3a99SPavel Emelyanov 41321a3a99SPavel Emelyanov if (hval != i) { 42321a3a99SPavel Emelyanov hlist_del(&q->list); 43321a3a99SPavel Emelyanov 44321a3a99SPavel Emelyanov /* Relink to new hash chain. */ 45321a3a99SPavel Emelyanov hlist_add_head(&q->list, &f->hash[hval]); 46321a3a99SPavel Emelyanov } 47321a3a99SPavel Emelyanov } 48321a3a99SPavel Emelyanov } 49321a3a99SPavel Emelyanov write_unlock(&f->lock); 50321a3a99SPavel Emelyanov 513b4bc4a2SPavel Emelyanov mod_timer(&f->secret_timer, now + f->secret_interval); 52321a3a99SPavel Emelyanov } 53321a3a99SPavel Emelyanov 547eb95156SPavel Emelyanov void inet_frags_init(struct inet_frags *f) 557eb95156SPavel Emelyanov { 567eb95156SPavel Emelyanov int i; 577eb95156SPavel Emelyanov 587eb95156SPavel Emelyanov for (i = 0; i < INETFRAGS_HASHSZ; i++) 597eb95156SPavel Emelyanov INIT_HLIST_HEAD(&f->hash[i]); 607eb95156SPavel Emelyanov 617eb95156SPavel Emelyanov rwlock_init(&f->lock); 627eb95156SPavel Emelyanov 637eb95156SPavel Emelyanov f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ 647eb95156SPavel Emelyanov (jiffies ^ (jiffies >> 6))); 657eb95156SPavel Emelyanov 66b24b8a24SPavel Emelyanov setup_timer(&f->secret_timer, inet_frag_secret_rebuild, 67b24b8a24SPavel Emelyanov (unsigned long)f); 683b4bc4a2SPavel Emelyanov f->secret_timer.expires = jiffies + f->secret_interval; 69321a3a99SPavel Emelyanov add_timer(&f->secret_timer); 707eb95156SPavel Emelyanov } 717eb95156SPavel Emelyanov EXPORT_SYMBOL(inet_frags_init); 727eb95156SPavel Emelyanov 73e5a2bb84SPavel Emelyanov void inet_frags_init_net(struct netns_frags *nf) 74e5a2bb84SPavel Emelyanov { 75e5a2bb84SPavel Emelyanov nf->nqueues = 0; 76*d433673eSJesper Dangaard Brouer init_frag_mem_limit(nf); 773140c25cSPavel Emelyanov INIT_LIST_HEAD(&nf->lru_list); 78e5a2bb84SPavel Emelyanov } 79e5a2bb84SPavel Emelyanov EXPORT_SYMBOL(inet_frags_init_net); 80e5a2bb84SPavel Emelyanov 817eb95156SPavel Emelyanov void inet_frags_fini(struct inet_frags *f) 827eb95156SPavel Emelyanov { 83321a3a99SPavel Emelyanov del_timer(&f->secret_timer); 847eb95156SPavel Emelyanov } 857eb95156SPavel Emelyanov EXPORT_SYMBOL(inet_frags_fini); 86277e650dSPavel Emelyanov 8781566e83SPavel Emelyanov void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f) 8881566e83SPavel Emelyanov { 8981566e83SPavel Emelyanov nf->low_thresh = 0; 90e8e16b70SDavid S. Miller 91e8e16b70SDavid S. Miller local_bh_disable(); 926b102865SAmerigo Wang inet_frag_evictor(nf, f, true); 93e8e16b70SDavid S. Miller local_bh_enable(); 9481566e83SPavel Emelyanov } 9581566e83SPavel Emelyanov EXPORT_SYMBOL(inet_frags_exit_net); 9681566e83SPavel Emelyanov 97277e650dSPavel Emelyanov static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) 98277e650dSPavel Emelyanov { 99277e650dSPavel Emelyanov write_lock(&f->lock); 100277e650dSPavel Emelyanov hlist_del(&fq->list); 101277e650dSPavel Emelyanov list_del(&fq->lru_list); 102e5a2bb84SPavel Emelyanov fq->net->nqueues--; 103277e650dSPavel Emelyanov write_unlock(&f->lock); 104277e650dSPavel Emelyanov } 105277e650dSPavel Emelyanov 106277e650dSPavel Emelyanov void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) 107277e650dSPavel Emelyanov { 108277e650dSPavel Emelyanov if (del_timer(&fq->timer)) 109277e650dSPavel Emelyanov atomic_dec(&fq->refcnt); 110277e650dSPavel Emelyanov 111bc578a54SJoe Perches if (!(fq->last_in & INET_FRAG_COMPLETE)) { 112277e650dSPavel Emelyanov fq_unlink(fq, f); 113277e650dSPavel Emelyanov atomic_dec(&fq->refcnt); 114bc578a54SJoe Perches fq->last_in |= INET_FRAG_COMPLETE; 115277e650dSPavel Emelyanov } 116277e650dSPavel Emelyanov } 117277e650dSPavel Emelyanov EXPORT_SYMBOL(inet_frag_kill); 1181e4b8287SPavel Emelyanov 1196ddc0822SPavel Emelyanov static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f, 120*d433673eSJesper Dangaard Brouer struct sk_buff *skb) 1211e4b8287SPavel Emelyanov { 1221e4b8287SPavel Emelyanov if (f->skb_free) 1231e4b8287SPavel Emelyanov f->skb_free(skb); 1241e4b8287SPavel Emelyanov kfree_skb(skb); 1251e4b8287SPavel Emelyanov } 1261e4b8287SPavel Emelyanov 1271e4b8287SPavel Emelyanov void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, 1281e4b8287SPavel Emelyanov int *work) 1291e4b8287SPavel Emelyanov { 1301e4b8287SPavel Emelyanov struct sk_buff *fp; 1316ddc0822SPavel Emelyanov struct netns_frags *nf; 132*d433673eSJesper Dangaard Brouer unsigned int sum, sum_truesize = 0; 1331e4b8287SPavel Emelyanov 134547b792cSIlpo Järvinen WARN_ON(!(q->last_in & INET_FRAG_COMPLETE)); 135547b792cSIlpo Järvinen WARN_ON(del_timer(&q->timer) != 0); 1361e4b8287SPavel Emelyanov 1371e4b8287SPavel Emelyanov /* Release all fragment data. */ 1381e4b8287SPavel Emelyanov fp = q->fragments; 1396ddc0822SPavel Emelyanov nf = q->net; 1401e4b8287SPavel Emelyanov while (fp) { 1411e4b8287SPavel Emelyanov struct sk_buff *xp = fp->next; 1421e4b8287SPavel Emelyanov 143*d433673eSJesper Dangaard Brouer sum_truesize += fp->truesize; 144*d433673eSJesper Dangaard Brouer frag_kfree_skb(nf, f, fp); 1451e4b8287SPavel Emelyanov fp = xp; 1461e4b8287SPavel Emelyanov } 147*d433673eSJesper Dangaard Brouer sum = sum_truesize + f->qsize; 1481e4b8287SPavel Emelyanov if (work) 149*d433673eSJesper Dangaard Brouer *work -= sum; 150*d433673eSJesper Dangaard Brouer sub_frag_mem_limit(q, sum); 1511e4b8287SPavel Emelyanov 152c9547709SPavel Emelyanov if (f->destructor) 1531e4b8287SPavel Emelyanov f->destructor(q); 154c9547709SPavel Emelyanov kfree(q); 1551e4b8287SPavel Emelyanov 1561e4b8287SPavel Emelyanov } 1571e4b8287SPavel Emelyanov EXPORT_SYMBOL(inet_frag_destroy); 1588e7999c4SPavel Emelyanov 1596b102865SAmerigo Wang int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force) 1608e7999c4SPavel Emelyanov { 1618e7999c4SPavel Emelyanov struct inet_frag_queue *q; 1628e7999c4SPavel Emelyanov int work, evicted = 0; 1638e7999c4SPavel Emelyanov 1646b102865SAmerigo Wang if (!force) { 165*d433673eSJesper Dangaard Brouer if (frag_mem_limit(nf) <= nf->high_thresh) 1666b102865SAmerigo Wang return 0; 1676b102865SAmerigo Wang } 1686b102865SAmerigo Wang 169*d433673eSJesper Dangaard Brouer work = frag_mem_limit(nf) - nf->low_thresh; 1708e7999c4SPavel Emelyanov while (work > 0) { 1718e7999c4SPavel Emelyanov read_lock(&f->lock); 1723140c25cSPavel Emelyanov if (list_empty(&nf->lru_list)) { 1738e7999c4SPavel Emelyanov read_unlock(&f->lock); 1748e7999c4SPavel Emelyanov break; 1758e7999c4SPavel Emelyanov } 1768e7999c4SPavel Emelyanov 1773140c25cSPavel Emelyanov q = list_first_entry(&nf->lru_list, 1788e7999c4SPavel Emelyanov struct inet_frag_queue, lru_list); 1798e7999c4SPavel Emelyanov atomic_inc(&q->refcnt); 1808e7999c4SPavel Emelyanov read_unlock(&f->lock); 1818e7999c4SPavel Emelyanov 1828e7999c4SPavel Emelyanov spin_lock(&q->lock); 183bc578a54SJoe Perches if (!(q->last_in & INET_FRAG_COMPLETE)) 1848e7999c4SPavel Emelyanov inet_frag_kill(q, f); 1858e7999c4SPavel Emelyanov spin_unlock(&q->lock); 1868e7999c4SPavel Emelyanov 1878e7999c4SPavel Emelyanov if (atomic_dec_and_test(&q->refcnt)) 1888e7999c4SPavel Emelyanov inet_frag_destroy(q, f, &work); 1898e7999c4SPavel Emelyanov evicted++; 1908e7999c4SPavel Emelyanov } 1918e7999c4SPavel Emelyanov 1928e7999c4SPavel Emelyanov return evicted; 1938e7999c4SPavel Emelyanov } 1948e7999c4SPavel Emelyanov EXPORT_SYMBOL(inet_frag_evictor); 1952588fe1dSPavel Emelyanov 196ac18e750SPavel Emelyanov static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, 197ac18e750SPavel Emelyanov struct inet_frag_queue *qp_in, struct inet_frags *f, 1989a375803SPavel Emelyanov void *arg) 1992588fe1dSPavel Emelyanov { 2002588fe1dSPavel Emelyanov struct inet_frag_queue *qp; 2012588fe1dSPavel Emelyanov #ifdef CONFIG_SMP 2022588fe1dSPavel Emelyanov struct hlist_node *n; 2032588fe1dSPavel Emelyanov #endif 2049a375803SPavel Emelyanov unsigned int hash; 2052588fe1dSPavel Emelyanov 2062588fe1dSPavel Emelyanov write_lock(&f->lock); 2079a375803SPavel Emelyanov /* 2089a375803SPavel Emelyanov * While we stayed w/o the lock other CPU could update 2099a375803SPavel Emelyanov * the rnd seed, so we need to re-calculate the hash 2109a375803SPavel Emelyanov * chain. Fortunatelly the qp_in can be used to get one. 2119a375803SPavel Emelyanov */ 2129a375803SPavel Emelyanov hash = f->hashfn(qp_in); 2132588fe1dSPavel Emelyanov #ifdef CONFIG_SMP 2142588fe1dSPavel Emelyanov /* With SMP race we have to recheck hash table, because 2152588fe1dSPavel Emelyanov * such entry could be created on other cpu, while we 2162588fe1dSPavel Emelyanov * promoted read lock to write lock. 2172588fe1dSPavel Emelyanov */ 2182588fe1dSPavel Emelyanov hlist_for_each_entry(qp, n, &f->hash[hash], list) { 219ac18e750SPavel Emelyanov if (qp->net == nf && f->match(qp, arg)) { 2202588fe1dSPavel Emelyanov atomic_inc(&qp->refcnt); 2212588fe1dSPavel Emelyanov write_unlock(&f->lock); 222bc578a54SJoe Perches qp_in->last_in |= INET_FRAG_COMPLETE; 2232588fe1dSPavel Emelyanov inet_frag_put(qp_in, f); 2242588fe1dSPavel Emelyanov return qp; 2252588fe1dSPavel Emelyanov } 2262588fe1dSPavel Emelyanov } 2272588fe1dSPavel Emelyanov #endif 2282588fe1dSPavel Emelyanov qp = qp_in; 229b2fd5321SPavel Emelyanov if (!mod_timer(&qp->timer, jiffies + nf->timeout)) 2302588fe1dSPavel Emelyanov atomic_inc(&qp->refcnt); 2312588fe1dSPavel Emelyanov 2322588fe1dSPavel Emelyanov atomic_inc(&qp->refcnt); 2332588fe1dSPavel Emelyanov hlist_add_head(&qp->list, &f->hash[hash]); 2343140c25cSPavel Emelyanov list_add_tail(&qp->lru_list, &nf->lru_list); 235e5a2bb84SPavel Emelyanov nf->nqueues++; 2362588fe1dSPavel Emelyanov write_unlock(&f->lock); 2372588fe1dSPavel Emelyanov return qp; 2382588fe1dSPavel Emelyanov } 239e521db9dSPavel Emelyanov 240ac18e750SPavel Emelyanov static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, 241ac18e750SPavel Emelyanov struct inet_frags *f, void *arg) 242e521db9dSPavel Emelyanov { 243e521db9dSPavel Emelyanov struct inet_frag_queue *q; 244e521db9dSPavel Emelyanov 245e521db9dSPavel Emelyanov q = kzalloc(f->qsize, GFP_ATOMIC); 246e521db9dSPavel Emelyanov if (q == NULL) 247e521db9dSPavel Emelyanov return NULL; 248e521db9dSPavel Emelyanov 24954db0cc2SGao feng q->net = nf; 250c6fda282SPavel Emelyanov f->constructor(q, arg); 251*d433673eSJesper Dangaard Brouer add_frag_mem_limit(q, f->qsize); 252*d433673eSJesper Dangaard Brouer 253e521db9dSPavel Emelyanov setup_timer(&q->timer, f->frag_expire, (unsigned long)q); 254e521db9dSPavel Emelyanov spin_lock_init(&q->lock); 255e521db9dSPavel Emelyanov atomic_set(&q->refcnt, 1); 256e521db9dSPavel Emelyanov 257e521db9dSPavel Emelyanov return q; 258e521db9dSPavel Emelyanov } 259c6fda282SPavel Emelyanov 260ac18e750SPavel Emelyanov static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, 2619a375803SPavel Emelyanov struct inet_frags *f, void *arg) 262c6fda282SPavel Emelyanov { 263c6fda282SPavel Emelyanov struct inet_frag_queue *q; 264c6fda282SPavel Emelyanov 265ac18e750SPavel Emelyanov q = inet_frag_alloc(nf, f, arg); 266c6fda282SPavel Emelyanov if (q == NULL) 267c6fda282SPavel Emelyanov return NULL; 268c6fda282SPavel Emelyanov 2699a375803SPavel Emelyanov return inet_frag_intern(nf, q, f, arg); 270c6fda282SPavel Emelyanov } 271abd6523dSPavel Emelyanov 272ac18e750SPavel Emelyanov struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, 273ac18e750SPavel Emelyanov struct inet_frags *f, void *key, unsigned int hash) 27456bca31fSHannes Eder __releases(&f->lock) 275abd6523dSPavel Emelyanov { 276abd6523dSPavel Emelyanov struct inet_frag_queue *q; 277abd6523dSPavel Emelyanov struct hlist_node *n; 278abd6523dSPavel Emelyanov 279abd6523dSPavel Emelyanov hlist_for_each_entry(q, n, &f->hash[hash], list) { 280ac18e750SPavel Emelyanov if (q->net == nf && f->match(q, key)) { 281abd6523dSPavel Emelyanov atomic_inc(&q->refcnt); 282abd6523dSPavel Emelyanov read_unlock(&f->lock); 283abd6523dSPavel Emelyanov return q; 284abd6523dSPavel Emelyanov } 285abd6523dSPavel Emelyanov } 286abd6523dSPavel Emelyanov read_unlock(&f->lock); 287abd6523dSPavel Emelyanov 2889a375803SPavel Emelyanov return inet_frag_create(nf, f, key); 289abd6523dSPavel Emelyanov } 290abd6523dSPavel Emelyanov EXPORT_SYMBOL(inet_frag_find); 291