1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * inet fragments management 4 * 5 * Authors: Pavel Emelyanov <xemul@openvz.org> 6 * Started as consolidation of ipv4/ip_fragment.c, 7 * ipv6/reassembly. and ipv6 nf conntrack reassembly 8 */ 9 10 #include <linux/list.h> 11 #include <linux/spinlock.h> 12 #include <linux/module.h> 13 #include <linux/timer.h> 14 #include <linux/mm.h> 15 #include <linux/random.h> 16 #include <linux/skbuff.h> 17 #include <linux/rtnetlink.h> 18 #include <linux/slab.h> 19 #include <linux/rhashtable.h> 20 21 #include <net/sock.h> 22 #include <net/inet_frag.h> 23 #include <net/inet_ecn.h> 24 #include <net/ip.h> 25 #include <net/ipv6.h> 26 27 /* Use skb->cb to track consecutive/adjacent fragments coming at 28 * the end of the queue. Nodes in the rb-tree queue will 29 * contain "runs" of one or more adjacent fragments. 30 * 31 * Invariants: 32 * - next_frag is NULL at the tail of a "run"; 33 * - the head of a "run" has the sum of all fragment lengths in frag_run_len. 34 */ 35 struct ipfrag_skb_cb { 36 union { 37 struct inet_skb_parm h4; 38 struct inet6_skb_parm h6; 39 }; 40 struct sk_buff *next_frag; 41 int frag_run_len; 42 int ip_defrag_offset; 43 }; 44 45 #define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) 46 47 static void fragcb_clear(struct sk_buff *skb) 48 { 49 RB_CLEAR_NODE(&skb->rbnode); 50 FRAG_CB(skb)->next_frag = NULL; 51 FRAG_CB(skb)->frag_run_len = skb->len; 52 } 53 54 /* Append skb to the last "run". */ 55 static void fragrun_append_to_last(struct inet_frag_queue *q, 56 struct sk_buff *skb) 57 { 58 fragcb_clear(skb); 59 60 FRAG_CB(q->last_run_head)->frag_run_len += skb->len; 61 FRAG_CB(q->fragments_tail)->next_frag = skb; 62 q->fragments_tail = skb; 63 } 64 65 /* Create a new "run" with the skb. */ 66 static void fragrun_create(struct inet_frag_queue *q, struct sk_buff *skb) 67 { 68 BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb)); 69 fragcb_clear(skb); 70 71 if (q->last_run_head) 72 rb_link_node(&skb->rbnode, &q->last_run_head->rbnode, 73 &q->last_run_head->rbnode.rb_right); 74 else 75 rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node); 76 rb_insert_color(&skb->rbnode, &q->rb_fragments); 77 78 q->fragments_tail = skb; 79 q->last_run_head = skb; 80 } 81 82 /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements 83 * Value : 0xff if frame should be dropped. 84 * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field 85 */ 86 const u8 ip_frag_ecn_table[16] = { 87 /* at least one fragment had CE, and others ECT_0 or ECT_1 */ 88 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE, 89 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE, 90 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE, 91 92 /* invalid combinations : drop frame */ 93 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff, 94 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff, 95 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff, 96 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, 97 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff, 98 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff, 99 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, 100 }; 101 EXPORT_SYMBOL(ip_frag_ecn_table); 102 103 int inet_frags_init(struct inet_frags *f) 104 { 105 f->frags_cachep = kmem_cache_create(f->frags_cache_name, f->qsize, 0, 0, 106 NULL); 107 if (!f->frags_cachep) 108 return -ENOMEM; 109 110 refcount_set(&f->refcnt, 1); 111 init_completion(&f->completion); 112 return 0; 113 } 114 EXPORT_SYMBOL(inet_frags_init); 115 116 void inet_frags_fini(struct inet_frags *f) 117 { 118 if (refcount_dec_and_test(&f->refcnt)) 119 complete(&f->completion); 120 121 wait_for_completion(&f->completion); 122 123 kmem_cache_destroy(f->frags_cachep); 124 f->frags_cachep = NULL; 125 } 126 EXPORT_SYMBOL(inet_frags_fini); 127 128 /* called from rhashtable_free_and_destroy() at netns_frags dismantle */ 129 static void inet_frags_free_cb(void *ptr, void *arg) 130 { 131 struct inet_frag_queue *fq = ptr; 132 int count; 133 134 count = timer_delete_sync(&fq->timer) ? 1 : 0; 135 136 spin_lock_bh(&fq->lock); 137 fq->flags |= INET_FRAG_DROP; 138 if (!(fq->flags & INET_FRAG_COMPLETE)) { 139 fq->flags |= INET_FRAG_COMPLETE; 140 count++; 141 } else if (fq->flags & INET_FRAG_HASH_DEAD) { 142 count++; 143 } 144 spin_unlock_bh(&fq->lock); 145 146 inet_frag_putn(fq, count); 147 } 148 149 static LLIST_HEAD(fqdir_free_list); 150 151 static void fqdir_free_fn(struct work_struct *work) 152 { 153 struct llist_node *kill_list; 154 struct fqdir *fqdir, *tmp; 155 struct inet_frags *f; 156 157 /* Atomically snapshot the list of fqdirs to free */ 158 kill_list = llist_del_all(&fqdir_free_list); 159 160 /* We need to make sure all ongoing call_rcu(..., inet_frag_destroy_rcu) 161 * have completed, since they need to dereference fqdir. 162 * Would it not be nice to have kfree_rcu_barrier() ? :) 163 */ 164 rcu_barrier(); 165 166 llist_for_each_entry_safe(fqdir, tmp, kill_list, free_list) { 167 f = fqdir->f; 168 if (refcount_dec_and_test(&f->refcnt)) 169 complete(&f->completion); 170 171 kfree(fqdir); 172 } 173 } 174 175 static DECLARE_DELAYED_WORK(fqdir_free_work, fqdir_free_fn); 176 177 static void fqdir_work_fn(struct work_struct *work) 178 { 179 struct fqdir *fqdir = container_of(work, struct fqdir, destroy_work); 180 181 rhashtable_free_and_destroy(&fqdir->rhashtable, inet_frags_free_cb, NULL); 182 183 if (llist_add(&fqdir->free_list, &fqdir_free_list)) 184 queue_delayed_work(system_percpu_wq, &fqdir_free_work, HZ); 185 } 186 187 int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net) 188 { 189 struct fqdir *fqdir = kzalloc_obj(*fqdir); 190 int res; 191 192 if (!fqdir) 193 return -ENOMEM; 194 fqdir->f = f; 195 fqdir->net = net; 196 res = rhashtable_init(&fqdir->rhashtable, &fqdir->f->rhash_params); 197 if (res < 0) { 198 kfree(fqdir); 199 return res; 200 } 201 refcount_inc(&f->refcnt); 202 *fqdirp = fqdir; 203 return 0; 204 } 205 EXPORT_SYMBOL(fqdir_init); 206 207 static struct workqueue_struct *inet_frag_wq; 208 209 static int __init inet_frag_wq_init(void) 210 { 211 inet_frag_wq = create_workqueue("inet_frag_wq"); 212 if (!inet_frag_wq) 213 panic("Could not create inet frag workq"); 214 return 0; 215 } 216 217 pure_initcall(inet_frag_wq_init); 218 219 void fqdir_pre_exit(struct fqdir *fqdir) 220 { 221 struct inet_frag_queue *fq; 222 struct rhashtable_iter hti; 223 224 /* Prevent creation of new frags. 225 * Pairs with READ_ONCE() in inet_frag_find(). 226 */ 227 WRITE_ONCE(fqdir->high_thresh, 0); 228 229 /* Pairs with READ_ONCE() in inet_frag_kill(), ip_expire() 230 * and ip6frag_expire_frag_queue(). 231 */ 232 WRITE_ONCE(fqdir->dead, true); 233 234 rhashtable_walk_enter(&fqdir->rhashtable, &hti); 235 rhashtable_walk_start(&hti); 236 237 while ((fq = rhashtable_walk_next(&hti))) { 238 if (IS_ERR(fq)) { 239 if (PTR_ERR(fq) != -EAGAIN) 240 break; 241 continue; 242 } 243 spin_lock_bh(&fq->lock); 244 if (!(fq->flags & INET_FRAG_COMPLETE)) 245 inet_frag_queue_flush(fq, 0); 246 spin_unlock_bh(&fq->lock); 247 } 248 249 rhashtable_walk_stop(&hti); 250 rhashtable_walk_exit(&hti); 251 } 252 EXPORT_SYMBOL(fqdir_pre_exit); 253 254 void fqdir_exit(struct fqdir *fqdir) 255 { 256 INIT_WORK(&fqdir->destroy_work, fqdir_work_fn); 257 queue_work(inet_frag_wq, &fqdir->destroy_work); 258 } 259 EXPORT_SYMBOL(fqdir_exit); 260 261 void inet_frag_kill(struct inet_frag_queue *fq, int *refs) 262 { 263 if (timer_delete(&fq->timer)) 264 (*refs)++; 265 266 if (!(fq->flags & INET_FRAG_COMPLETE)) { 267 struct fqdir *fqdir = fq->fqdir; 268 269 fq->flags |= INET_FRAG_COMPLETE; 270 rcu_read_lock(); 271 /* The RCU read lock provides a memory barrier 272 * guaranteeing that if fqdir->dead is false then 273 * the hash table destruction will not start until 274 * after we unlock. Paired with fqdir_pre_exit(). 275 */ 276 if (!READ_ONCE(fqdir->dead)) { 277 rhashtable_remove_fast(&fqdir->rhashtable, &fq->node, 278 fqdir->f->rhash_params); 279 (*refs)++; 280 } else { 281 fq->flags |= INET_FRAG_HASH_DEAD; 282 } 283 rcu_read_unlock(); 284 } 285 } 286 EXPORT_SYMBOL(inet_frag_kill); 287 288 static void inet_frag_destroy_rcu(struct rcu_head *head) 289 { 290 struct inet_frag_queue *q = container_of(head, struct inet_frag_queue, 291 rcu); 292 struct inet_frags *f = q->fqdir->f; 293 294 if (f->destructor) 295 f->destructor(q); 296 kmem_cache_free(f->frags_cachep, q); 297 } 298 299 static unsigned int 300 inet_frag_rbtree_purge(struct rb_root *root, enum skb_drop_reason reason) 301 { 302 struct rb_node *p = rb_first(root); 303 unsigned int sum = 0; 304 305 while (p) { 306 struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode); 307 308 p = rb_next(p); 309 rb_erase(&skb->rbnode, root); 310 while (skb) { 311 struct sk_buff *next = FRAG_CB(skb)->next_frag; 312 313 sum += skb->truesize; 314 kfree_skb_reason(skb, reason); 315 skb = next; 316 } 317 } 318 return sum; 319 } 320 321 void inet_frag_queue_flush(struct inet_frag_queue *q, 322 enum skb_drop_reason reason) 323 { 324 unsigned int sum; 325 326 reason = reason ?: SKB_DROP_REASON_FRAG_REASM_TIMEOUT; 327 sum = inet_frag_rbtree_purge(&q->rb_fragments, reason); 328 sub_frag_mem_limit(q->fqdir, sum); 329 q->rb_fragments = RB_ROOT; 330 q->fragments_tail = NULL; 331 q->last_run_head = NULL; 332 } 333 EXPORT_SYMBOL(inet_frag_queue_flush); 334 335 void inet_frag_destroy(struct inet_frag_queue *q) 336 { 337 unsigned int sum, sum_truesize = 0; 338 enum skb_drop_reason reason; 339 struct inet_frags *f; 340 struct fqdir *fqdir; 341 342 WARN_ON(!(q->flags & INET_FRAG_COMPLETE)); 343 reason = (q->flags & INET_FRAG_DROP) ? 344 SKB_DROP_REASON_FRAG_REASM_TIMEOUT : 345 SKB_CONSUMED; 346 WARN_ON(timer_delete(&q->timer) != 0); 347 348 /* Release all fragment data. */ 349 fqdir = q->fqdir; 350 f = fqdir->f; 351 sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments, reason); 352 sum = sum_truesize + f->qsize; 353 354 call_rcu(&q->rcu, inet_frag_destroy_rcu); 355 356 sub_frag_mem_limit(fqdir, sum); 357 } 358 EXPORT_SYMBOL(inet_frag_destroy); 359 360 static struct inet_frag_queue *inet_frag_alloc(struct fqdir *fqdir, 361 struct inet_frags *f, 362 void *arg) 363 { 364 struct inet_frag_queue *q; 365 366 q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC); 367 if (!q) 368 return NULL; 369 370 q->fqdir = fqdir; 371 f->constructor(q, arg); 372 add_frag_mem_limit(fqdir, f->qsize); 373 374 timer_setup(&q->timer, f->frag_expire, 0); 375 spin_lock_init(&q->lock); 376 /* One reference for the timer, one for the hash table. 377 * We never take any extra references, only decrement this field. 378 */ 379 refcount_set(&q->refcnt, 2); 380 381 return q; 382 } 383 384 static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir, 385 void *arg, 386 struct inet_frag_queue **prev) 387 { 388 struct inet_frags *f = fqdir->f; 389 struct inet_frag_queue *q; 390 391 q = inet_frag_alloc(fqdir, f, arg); 392 if (!q) { 393 *prev = ERR_PTR(-ENOMEM); 394 return NULL; 395 } 396 mod_timer(&q->timer, jiffies + fqdir->timeout); 397 398 *prev = rhashtable_lookup_get_insert_key(&fqdir->rhashtable, &q->key, 399 &q->node, f->rhash_params); 400 if (*prev) { 401 /* We could not insert in the hash table, 402 * we need to cancel what inet_frag_alloc() 403 * anticipated. 404 */ 405 int refs = 1; 406 407 q->flags |= INET_FRAG_COMPLETE; 408 inet_frag_kill(q, &refs); 409 inet_frag_putn(q, refs); 410 return NULL; 411 } 412 return q; 413 } 414 415 struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key) 416 { 417 /* This pairs with WRITE_ONCE() in fqdir_pre_exit(). */ 418 long high_thresh = READ_ONCE(fqdir->high_thresh); 419 struct inet_frag_queue *fq = NULL, *prev; 420 421 if (!high_thresh || frag_mem_limit(fqdir) > high_thresh) 422 return NULL; 423 424 prev = rhashtable_lookup(&fqdir->rhashtable, key, fqdir->f->rhash_params); 425 if (!prev) 426 fq = inet_frag_create(fqdir, key, &prev); 427 if (!IS_ERR_OR_NULL(prev)) 428 fq = prev; 429 return fq; 430 } 431 EXPORT_SYMBOL(inet_frag_find); 432 433 int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, 434 int offset, int end) 435 { 436 struct sk_buff *last = q->fragments_tail; 437 438 /* RFC5722, Section 4, amended by Errata ID : 3089 439 * When reassembling an IPv6 datagram, if 440 * one or more its constituent fragments is determined to be an 441 * overlapping fragment, the entire datagram (and any constituent 442 * fragments) MUST be silently discarded. 443 * 444 * Duplicates, however, should be ignored (i.e. skb dropped, but the 445 * queue/fragments kept for later reassembly). 446 */ 447 if (!last) 448 fragrun_create(q, skb); /* First fragment. */ 449 else if (FRAG_CB(last)->ip_defrag_offset + last->len < end) { 450 /* This is the common case: skb goes to the end. */ 451 /* Detect and discard overlaps. */ 452 if (offset < FRAG_CB(last)->ip_defrag_offset + last->len) 453 return IPFRAG_OVERLAP; 454 if (offset == FRAG_CB(last)->ip_defrag_offset + last->len) 455 fragrun_append_to_last(q, skb); 456 else 457 fragrun_create(q, skb); 458 } else { 459 /* Binary search. Note that skb can become the first fragment, 460 * but not the last (covered above). 461 */ 462 struct rb_node **rbn, *parent; 463 464 rbn = &q->rb_fragments.rb_node; 465 do { 466 struct sk_buff *curr; 467 int curr_run_end; 468 469 parent = *rbn; 470 curr = rb_to_skb(parent); 471 curr_run_end = FRAG_CB(curr)->ip_defrag_offset + 472 FRAG_CB(curr)->frag_run_len; 473 if (end <= FRAG_CB(curr)->ip_defrag_offset) 474 rbn = &parent->rb_left; 475 else if (offset >= curr_run_end) 476 rbn = &parent->rb_right; 477 else if (offset >= FRAG_CB(curr)->ip_defrag_offset && 478 end <= curr_run_end) 479 return IPFRAG_DUP; 480 else 481 return IPFRAG_OVERLAP; 482 } while (*rbn); 483 /* Here we have parent properly set, and rbn pointing to 484 * one of its NULL left/right children. Insert skb. 485 */ 486 fragcb_clear(skb); 487 rb_link_node(&skb->rbnode, parent, rbn); 488 rb_insert_color(&skb->rbnode, &q->rb_fragments); 489 } 490 491 FRAG_CB(skb)->ip_defrag_offset = offset; 492 if (offset) 493 nf_reset_ct(skb); 494 495 return IPFRAG_OK; 496 } 497 EXPORT_SYMBOL(inet_frag_queue_insert); 498 499 void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, 500 struct sk_buff *parent) 501 { 502 struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments); 503 void (*destructor)(struct sk_buff *); 504 unsigned int orig_truesize = 0; 505 struct sk_buff **nextp = NULL; 506 struct sock *sk = skb->sk; 507 int delta; 508 509 if (sk && is_skb_wmem(skb)) { 510 /* TX: skb->sk might have been passed as argument to 511 * dst->output and must remain valid until tx completes. 512 * 513 * Move sk to reassembled skb and fix up wmem accounting. 514 */ 515 orig_truesize = skb->truesize; 516 destructor = skb->destructor; 517 } 518 519 if (head != skb) { 520 fp = skb_clone(skb, GFP_ATOMIC); 521 if (!fp) { 522 head = skb; 523 goto out_restore_sk; 524 } 525 if (RB_EMPTY_NODE(&skb->rbnode)) 526 FRAG_CB(parent)->next_frag = fp; 527 else 528 rb_replace_node(&skb->rbnode, &fp->rbnode, 529 &q->rb_fragments); 530 if (q->fragments_tail == skb) 531 q->fragments_tail = fp; 532 533 if (orig_truesize) { 534 /* prevent skb_morph from releasing sk */ 535 skb->sk = NULL; 536 skb->destructor = NULL; 537 } 538 skb_morph(skb, head); 539 rb_replace_node(&head->rbnode, &skb->rbnode, 540 &q->rb_fragments); 541 consume_skb(head); 542 head = skb; 543 } 544 WARN_ON(FRAG_CB(head)->ip_defrag_offset != 0); 545 546 delta = -head->truesize; 547 548 /* Head of list must not be cloned. */ 549 if (skb_unclone(head, GFP_ATOMIC)) 550 goto out_restore_sk; 551 552 delta += head->truesize; 553 if (delta) 554 add_frag_mem_limit(q->fqdir, delta); 555 556 /* If the first fragment is fragmented itself, we split 557 * it to two chunks: the first with data and paged part 558 * and the second, holding only fragments. 559 */ 560 if (skb_has_frag_list(head)) { 561 struct sk_buff *clone; 562 int i, plen = 0; 563 564 clone = alloc_skb(0, GFP_ATOMIC); 565 if (!clone) 566 goto out_restore_sk; 567 skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; 568 skb_frag_list_init(head); 569 for (i = 0; i < skb_shinfo(head)->nr_frags; i++) 570 plen += skb_frag_size(&skb_shinfo(head)->frags[i]); 571 clone->data_len = head->data_len - plen; 572 clone->len = clone->data_len; 573 head->truesize += clone->truesize; 574 clone->csum = 0; 575 clone->ip_summed = head->ip_summed; 576 add_frag_mem_limit(q->fqdir, clone->truesize); 577 skb_shinfo(head)->frag_list = clone; 578 nextp = &clone->next; 579 } else { 580 nextp = &skb_shinfo(head)->frag_list; 581 } 582 583 out_restore_sk: 584 if (orig_truesize) { 585 int ts_delta = head->truesize - orig_truesize; 586 587 /* if this reassembled skb is fragmented later, 588 * fraglist skbs will get skb->sk assigned from head->sk, 589 * and each frag skb will be released via sock_wfree. 590 * 591 * Update sk_wmem_alloc. 592 */ 593 head->sk = sk; 594 head->destructor = destructor; 595 refcount_add(ts_delta, &sk->sk_wmem_alloc); 596 } 597 598 return nextp; 599 } 600 EXPORT_SYMBOL(inet_frag_reasm_prepare); 601 602 void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, 603 void *reasm_data, bool try_coalesce) 604 { 605 struct sock *sk = is_skb_wmem(head) ? head->sk : NULL; 606 const unsigned int head_truesize = head->truesize; 607 struct sk_buff **nextp = reasm_data; 608 struct rb_node *rbn; 609 struct sk_buff *fp; 610 int sum_truesize; 611 612 skb_push(head, head->data - skb_network_header(head)); 613 614 /* Traverse the tree in order, to build frag_list. */ 615 fp = FRAG_CB(head)->next_frag; 616 rbn = rb_next(&head->rbnode); 617 rb_erase(&head->rbnode, &q->rb_fragments); 618 619 sum_truesize = head->truesize; 620 while (rbn || fp) { 621 /* fp points to the next sk_buff in the current run; 622 * rbn points to the next run. 623 */ 624 /* Go through the current run. */ 625 while (fp) { 626 struct sk_buff *next_frag = FRAG_CB(fp)->next_frag; 627 bool stolen; 628 int delta; 629 630 sum_truesize += fp->truesize; 631 if (head->ip_summed != fp->ip_summed) 632 head->ip_summed = CHECKSUM_NONE; 633 else if (head->ip_summed == CHECKSUM_COMPLETE) 634 head->csum = csum_add(head->csum, fp->csum); 635 636 if (try_coalesce && skb_try_coalesce(head, fp, &stolen, 637 &delta)) { 638 kfree_skb_partial(fp, stolen); 639 } else { 640 fp->prev = NULL; 641 memset(&fp->rbnode, 0, sizeof(fp->rbnode)); 642 fp->sk = NULL; 643 644 head->data_len += fp->len; 645 head->len += fp->len; 646 head->truesize += fp->truesize; 647 648 *nextp = fp; 649 nextp = &fp->next; 650 } 651 652 fp = next_frag; 653 } 654 /* Move to the next run. */ 655 if (rbn) { 656 struct rb_node *rbnext = rb_next(rbn); 657 658 fp = rb_to_skb(rbn); 659 rb_erase(rbn, &q->rb_fragments); 660 rbn = rbnext; 661 } 662 } 663 sub_frag_mem_limit(q->fqdir, sum_truesize); 664 665 *nextp = NULL; 666 skb_mark_not_on_list(head); 667 head->prev = NULL; 668 head->tstamp = q->stamp; 669 head->tstamp_type = q->tstamp_type; 670 671 if (sk) 672 refcount_add(sum_truesize - head_truesize, &sk->sk_wmem_alloc); 673 } 674 EXPORT_SYMBOL(inet_frag_reasm_finish); 675 676 struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q) 677 { 678 struct sk_buff *head, *skb; 679 680 head = skb_rb_first(&q->rb_fragments); 681 if (!head) 682 return NULL; 683 skb = FRAG_CB(head)->next_frag; 684 if (skb) 685 rb_replace_node(&head->rbnode, &skb->rbnode, 686 &q->rb_fragments); 687 else 688 rb_erase(&head->rbnode, &q->rb_fragments); 689 memset(&head->rbnode, 0, sizeof(head->rbnode)); 690 barrier(); 691 692 if (head == q->fragments_tail) 693 q->fragments_tail = NULL; 694 695 sub_frag_mem_limit(q->fqdir, head->truesize); 696 697 return head; 698 } 699 EXPORT_SYMBOL(inet_frag_pull_head); 700