1 /* 2 * inet fragments management 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Authors: Pavel Emelyanov <xemul@openvz.org> 10 * Started as consolidation of ipv4/ip_fragment.c, 11 * ipv6/reassembly. and ipv6 nf conntrack reassembly 12 */ 13 14 #include <linux/list.h> 15 #include <linux/spinlock.h> 16 #include <linux/module.h> 17 #include <linux/timer.h> 18 #include <linux/mm.h> 19 #include <linux/random.h> 20 #include <linux/skbuff.h> 21 #include <linux/rtnetlink.h> 22 #include <linux/slab.h> 23 24 #include <net/sock.h> 25 #include <net/inet_frag.h> 26 #include <net/inet_ecn.h> 27 28 /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements 29 * Value : 0xff if frame should be dropped. 30 * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field 31 */ 32 const u8 ip_frag_ecn_table[16] = { 33 /* at least one fragment had CE, and others ECT_0 or ECT_1 */ 34 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE, 35 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE, 36 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE, 37 38 /* invalid combinations : drop frame */ 39 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff, 40 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff, 41 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff, 42 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, 43 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff, 44 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff, 45 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, 46 }; 47 EXPORT_SYMBOL(ip_frag_ecn_table); 48 49 static void inet_frag_secret_rebuild(unsigned long dummy) 50 { 51 struct inet_frags *f = (struct inet_frags *)dummy; 52 unsigned long now = jiffies; 53 int i; 54 55 write_lock(&f->lock); 56 get_random_bytes(&f->rnd, sizeof(u32)); 57 for (i = 0; i < INETFRAGS_HASHSZ; i++) { 58 struct inet_frag_queue *q; 59 struct hlist_node *n; 60 61 hlist_for_each_entry_safe(q, n, &f->hash[i], list) { 62 unsigned int hval = f->hashfn(q); 63 64 if (hval != i) { 65 hlist_del(&q->list); 66 67 /* Relink to new hash chain. */ 68 hlist_add_head(&q->list, &f->hash[hval]); 69 } 70 } 71 } 72 write_unlock(&f->lock); 73 74 mod_timer(&f->secret_timer, now + f->secret_interval); 75 } 76 77 void inet_frags_init(struct inet_frags *f) 78 { 79 int i; 80 81 for (i = 0; i < INETFRAGS_HASHSZ; i++) 82 INIT_HLIST_HEAD(&f->hash[i]); 83 84 rwlock_init(&f->lock); 85 86 f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ 87 (jiffies ^ (jiffies >> 6))); 88 89 setup_timer(&f->secret_timer, inet_frag_secret_rebuild, 90 (unsigned long)f); 91 f->secret_timer.expires = jiffies + f->secret_interval; 92 add_timer(&f->secret_timer); 93 } 94 EXPORT_SYMBOL(inet_frags_init); 95 96 void inet_frags_init_net(struct netns_frags *nf) 97 { 98 nf->nqueues = 0; 99 init_frag_mem_limit(nf); 100 INIT_LIST_HEAD(&nf->lru_list); 101 spin_lock_init(&nf->lru_lock); 102 } 103 EXPORT_SYMBOL(inet_frags_init_net); 104 105 void inet_frags_fini(struct inet_frags *f) 106 { 107 del_timer(&f->secret_timer); 108 } 109 EXPORT_SYMBOL(inet_frags_fini); 110 111 void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f) 112 { 113 nf->low_thresh = 0; 114 115 local_bh_disable(); 116 inet_frag_evictor(nf, f, true); 117 local_bh_enable(); 118 119 percpu_counter_destroy(&nf->mem); 120 } 121 EXPORT_SYMBOL(inet_frags_exit_net); 122 123 static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) 124 { 125 write_lock(&f->lock); 126 hlist_del(&fq->list); 127 write_unlock(&f->lock); 128 inet_frag_lru_del(fq); 129 } 130 131 void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) 132 { 133 if (del_timer(&fq->timer)) 134 atomic_dec(&fq->refcnt); 135 136 if (!(fq->last_in & INET_FRAG_COMPLETE)) { 137 fq_unlink(fq, f); 138 atomic_dec(&fq->refcnt); 139 fq->last_in |= INET_FRAG_COMPLETE; 140 } 141 } 142 EXPORT_SYMBOL(inet_frag_kill); 143 144 static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f, 145 struct sk_buff *skb) 146 { 147 if (f->skb_free) 148 f->skb_free(skb); 149 kfree_skb(skb); 150 } 151 152 void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, 153 int *work) 154 { 155 struct sk_buff *fp; 156 struct netns_frags *nf; 157 unsigned int sum, sum_truesize = 0; 158 159 WARN_ON(!(q->last_in & INET_FRAG_COMPLETE)); 160 WARN_ON(del_timer(&q->timer) != 0); 161 162 /* Release all fragment data. */ 163 fp = q->fragments; 164 nf = q->net; 165 while (fp) { 166 struct sk_buff *xp = fp->next; 167 168 sum_truesize += fp->truesize; 169 frag_kfree_skb(nf, f, fp); 170 fp = xp; 171 } 172 sum = sum_truesize + f->qsize; 173 if (work) 174 *work -= sum; 175 sub_frag_mem_limit(q, sum); 176 177 if (f->destructor) 178 f->destructor(q); 179 kfree(q); 180 181 } 182 EXPORT_SYMBOL(inet_frag_destroy); 183 184 int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force) 185 { 186 struct inet_frag_queue *q; 187 int work, evicted = 0; 188 189 if (!force) { 190 if (frag_mem_limit(nf) <= nf->high_thresh) 191 return 0; 192 } 193 194 work = frag_mem_limit(nf) - nf->low_thresh; 195 while (work > 0) { 196 spin_lock(&nf->lru_lock); 197 198 if (list_empty(&nf->lru_list)) { 199 spin_unlock(&nf->lru_lock); 200 break; 201 } 202 203 q = list_first_entry(&nf->lru_list, 204 struct inet_frag_queue, lru_list); 205 atomic_inc(&q->refcnt); 206 /* Remove q from list to avoid several CPUs grabbing it */ 207 list_del_init(&q->lru_list); 208 209 spin_unlock(&nf->lru_lock); 210 211 spin_lock(&q->lock); 212 if (!(q->last_in & INET_FRAG_COMPLETE)) 213 inet_frag_kill(q, f); 214 spin_unlock(&q->lock); 215 216 if (atomic_dec_and_test(&q->refcnt)) 217 inet_frag_destroy(q, f, &work); 218 evicted++; 219 } 220 221 return evicted; 222 } 223 EXPORT_SYMBOL(inet_frag_evictor); 224 225 static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, 226 struct inet_frag_queue *qp_in, struct inet_frags *f, 227 void *arg) 228 { 229 struct inet_frag_queue *qp; 230 #ifdef CONFIG_SMP 231 #endif 232 unsigned int hash; 233 234 write_lock(&f->lock); 235 /* 236 * While we stayed w/o the lock other CPU could update 237 * the rnd seed, so we need to re-calculate the hash 238 * chain. Fortunatelly the qp_in can be used to get one. 239 */ 240 hash = f->hashfn(qp_in); 241 #ifdef CONFIG_SMP 242 /* With SMP race we have to recheck hash table, because 243 * such entry could be created on other cpu, while we 244 * promoted read lock to write lock. 245 */ 246 hlist_for_each_entry(qp, &f->hash[hash], list) { 247 if (qp->net == nf && f->match(qp, arg)) { 248 atomic_inc(&qp->refcnt); 249 write_unlock(&f->lock); 250 qp_in->last_in |= INET_FRAG_COMPLETE; 251 inet_frag_put(qp_in, f); 252 return qp; 253 } 254 } 255 #endif 256 qp = qp_in; 257 if (!mod_timer(&qp->timer, jiffies + nf->timeout)) 258 atomic_inc(&qp->refcnt); 259 260 atomic_inc(&qp->refcnt); 261 hlist_add_head(&qp->list, &f->hash[hash]); 262 write_unlock(&f->lock); 263 inet_frag_lru_add(nf, qp); 264 return qp; 265 } 266 267 static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, 268 struct inet_frags *f, void *arg) 269 { 270 struct inet_frag_queue *q; 271 272 q = kzalloc(f->qsize, GFP_ATOMIC); 273 if (q == NULL) 274 return NULL; 275 276 q->net = nf; 277 f->constructor(q, arg); 278 add_frag_mem_limit(q, f->qsize); 279 280 setup_timer(&q->timer, f->frag_expire, (unsigned long)q); 281 spin_lock_init(&q->lock); 282 atomic_set(&q->refcnt, 1); 283 284 return q; 285 } 286 287 static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, 288 struct inet_frags *f, void *arg) 289 { 290 struct inet_frag_queue *q; 291 292 q = inet_frag_alloc(nf, f, arg); 293 if (q == NULL) 294 return NULL; 295 296 return inet_frag_intern(nf, q, f, arg); 297 } 298 299 struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, 300 struct inet_frags *f, void *key, unsigned int hash) 301 __releases(&f->lock) 302 { 303 struct inet_frag_queue *q; 304 int depth = 0; 305 306 hlist_for_each_entry(q, &f->hash[hash], list) { 307 if (q->net == nf && f->match(q, key)) { 308 atomic_inc(&q->refcnt); 309 read_unlock(&f->lock); 310 return q; 311 } 312 depth++; 313 } 314 read_unlock(&f->lock); 315 316 if (depth <= INETFRAGS_MAXDEPTH) 317 return inet_frag_create(nf, f, key); 318 else 319 return ERR_PTR(-ENOBUFS); 320 } 321 EXPORT_SYMBOL(inet_frag_find); 322 323 void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q, 324 const char *prefix) 325 { 326 static const char msg[] = "inet_frag_find: Fragment hash bucket" 327 " list length grew over limit " __stringify(INETFRAGS_MAXDEPTH) 328 ". Dropping fragment.\n"; 329 330 if (PTR_ERR(q) == -ENOBUFS) 331 LIMIT_NETDEBUG(KERN_WARNING "%s%s", prefix, msg); 332 } 333 EXPORT_SYMBOL(inet_frag_maybe_warn_overflow); 334