1 /* 2 * inet fragments management 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Authors: Pavel Emelyanov <xemul@openvz.org> 10 * Started as consolidation of ipv4/ip_fragment.c, 11 * ipv6/reassembly. and ipv6 nf conntrack reassembly 12 */ 13 14 #include <linux/list.h> 15 #include <linux/spinlock.h> 16 #include <linux/module.h> 17 #include <linux/timer.h> 18 #include <linux/mm.h> 19 #include <linux/random.h> 20 #include <linux/skbuff.h> 21 #include <linux/rtnetlink.h> 22 #include <linux/slab.h> 23 #include <linux/rhashtable.h> 24 25 #include <net/sock.h> 26 #include <net/inet_frag.h> 27 #include <net/inet_ecn.h> 28 29 /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements 30 * Value : 0xff if frame should be dropped. 31 * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field 32 */ 33 const u8 ip_frag_ecn_table[16] = { 34 /* at least one fragment had CE, and others ECT_0 or ECT_1 */ 35 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE, 36 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE, 37 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE, 38 39 /* invalid combinations : drop frame */ 40 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff, 41 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff, 42 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff, 43 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, 44 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff, 45 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff, 46 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, 47 }; 48 EXPORT_SYMBOL(ip_frag_ecn_table); 49 50 int inet_frags_init(struct inet_frags *f) 51 { 52 f->frags_cachep = kmem_cache_create(f->frags_cache_name, f->qsize, 0, 0, 53 NULL); 54 if (!f->frags_cachep) 55 return -ENOMEM; 56 57 return 0; 58 } 59 EXPORT_SYMBOL(inet_frags_init); 60 61 void inet_frags_fini(struct inet_frags *f) 62 { 63 /* We must wait that all inet_frag_destroy_rcu() have completed. */ 64 rcu_barrier(); 65 66 kmem_cache_destroy(f->frags_cachep); 67 f->frags_cachep = NULL; 68 } 69 EXPORT_SYMBOL(inet_frags_fini); 70 71 static void inet_frags_free_cb(void *ptr, void *arg) 72 { 73 struct inet_frag_queue *fq = ptr; 74 75 /* If we can not cancel the timer, it means this frag_queue 76 * is already disappearing, we have nothing to do. 77 * Otherwise, we own a refcount until the end of this function. 78 */ 79 if (!del_timer(&fq->timer)) 80 return; 81 82 spin_lock_bh(&fq->lock); 83 if (!(fq->flags & INET_FRAG_COMPLETE)) { 84 fq->flags |= INET_FRAG_COMPLETE; 85 refcount_dec(&fq->refcnt); 86 } 87 spin_unlock_bh(&fq->lock); 88 89 inet_frag_put(fq); 90 } 91 92 void inet_frags_exit_net(struct netns_frags *nf) 93 { 94 nf->low_thresh = 0; /* prevent creation of new frags */ 95 96 rhashtable_free_and_destroy(&nf->rhashtable, inet_frags_free_cb, NULL); 97 } 98 EXPORT_SYMBOL(inet_frags_exit_net); 99 100 void inet_frag_kill(struct inet_frag_queue *fq) 101 { 102 if (del_timer(&fq->timer)) 103 refcount_dec(&fq->refcnt); 104 105 if (!(fq->flags & INET_FRAG_COMPLETE)) { 106 struct netns_frags *nf = fq->net; 107 108 fq->flags |= INET_FRAG_COMPLETE; 109 rhashtable_remove_fast(&nf->rhashtable, &fq->node, nf->f->rhash_params); 110 refcount_dec(&fq->refcnt); 111 } 112 } 113 EXPORT_SYMBOL(inet_frag_kill); 114 115 static void inet_frag_destroy_rcu(struct rcu_head *head) 116 { 117 struct inet_frag_queue *q = container_of(head, struct inet_frag_queue, 118 rcu); 119 struct inet_frags *f = q->net->f; 120 121 if (f->destructor) 122 f->destructor(q); 123 kmem_cache_free(f->frags_cachep, q); 124 } 125 126 void inet_frag_destroy(struct inet_frag_queue *q) 127 { 128 struct sk_buff *fp; 129 struct netns_frags *nf; 130 unsigned int sum, sum_truesize = 0; 131 struct inet_frags *f; 132 133 WARN_ON(!(q->flags & INET_FRAG_COMPLETE)); 134 WARN_ON(del_timer(&q->timer) != 0); 135 136 /* Release all fragment data. */ 137 fp = q->fragments; 138 nf = q->net; 139 f = nf->f; 140 while (fp) { 141 struct sk_buff *xp = fp->next; 142 143 sum_truesize += fp->truesize; 144 kfree_skb(fp); 145 fp = xp; 146 } 147 sum = sum_truesize + f->qsize; 148 149 call_rcu(&q->rcu, inet_frag_destroy_rcu); 150 151 sub_frag_mem_limit(nf, sum); 152 } 153 EXPORT_SYMBOL(inet_frag_destroy); 154 155 static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, 156 struct inet_frags *f, 157 void *arg) 158 { 159 struct inet_frag_queue *q; 160 161 if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh) 162 return NULL; 163 164 q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC); 165 if (!q) 166 return NULL; 167 168 q->net = nf; 169 f->constructor(q, arg); 170 add_frag_mem_limit(nf, f->qsize); 171 172 timer_setup(&q->timer, f->frag_expire, 0); 173 spin_lock_init(&q->lock); 174 refcount_set(&q->refcnt, 3); 175 176 return q; 177 } 178 179 static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, 180 void *arg) 181 { 182 struct inet_frags *f = nf->f; 183 struct inet_frag_queue *q; 184 int err; 185 186 q = inet_frag_alloc(nf, f, arg); 187 if (!q) 188 return NULL; 189 190 mod_timer(&q->timer, jiffies + nf->timeout); 191 192 err = rhashtable_insert_fast(&nf->rhashtable, &q->node, 193 f->rhash_params); 194 if (err < 0) { 195 q->flags |= INET_FRAG_COMPLETE; 196 inet_frag_kill(q); 197 inet_frag_destroy(q); 198 return NULL; 199 } 200 return q; 201 } 202 203 /* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */ 204 struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key) 205 { 206 struct inet_frag_queue *fq; 207 208 rcu_read_lock(); 209 210 fq = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params); 211 if (fq) { 212 if (!refcount_inc_not_zero(&fq->refcnt)) 213 fq = NULL; 214 rcu_read_unlock(); 215 return fq; 216 } 217 rcu_read_unlock(); 218 219 return inet_frag_create(nf, key); 220 } 221 EXPORT_SYMBOL(inet_frag_find); 222