1 /* 2 * Routines having to do with the 'struct sk_buff' memory handlers. 3 * 4 * Authors: Alan Cox <iiitac@pyr.swan.ac.uk> 5 * Florian La Roche <rzsfl@rz.uni-sb.de> 6 * 7 * Version: $Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $ 8 * 9 * Fixes: 10 * Alan Cox : Fixed the worst of the load 11 * balancer bugs. 12 * Dave Platt : Interrupt stacking fix. 13 * Richard Kooijman : Timestamp fixes. 14 * Alan Cox : Changed buffer format. 15 * Alan Cox : destructor hook for AF_UNIX etc. 16 * Linus Torvalds : Better skb_clone. 17 * Alan Cox : Added skb_copy. 18 * Alan Cox : Added all the changed routines Linus 19 * only put in the headers 20 * Ray VanTassle : Fixed --skb->lock in free 21 * Alan Cox : skb_copy copy arp field 22 * Andi Kleen : slabified it. 23 * Robert Olsson : Removed skb_head_pool 24 * 25 * NOTE: 26 * The __skb_ routines should be called with interrupts 27 * disabled, or you better be *real* sure that the operation is atomic 28 * with respect to whatever list is being frobbed (e.g. via lock_sock() 29 * or via disabling bottom half handlers, etc). 30 * 31 * This program is free software; you can redistribute it and/or 32 * modify it under the terms of the GNU General Public License 33 * as published by the Free Software Foundation; either version 34 * 2 of the License, or (at your option) any later version. 35 */ 36 37 /* 38 * The functions in this file will not compile correctly with gcc 2.4.x 39 */ 40 41 #include <linux/config.h> 42 #include <linux/module.h> 43 #include <linux/types.h> 44 #include <linux/kernel.h> 45 #include <linux/sched.h> 46 #include <linux/mm.h> 47 #include <linux/interrupt.h> 48 #include <linux/in.h> 49 #include <linux/inet.h> 50 #include <linux/slab.h> 51 #include <linux/netdevice.h> 52 #ifdef CONFIG_NET_CLS_ACT 53 #include <net/pkt_sched.h> 54 #endif 55 #include <linux/string.h> 56 #include <linux/skbuff.h> 57 #include <linux/cache.h> 58 #include <linux/rtnetlink.h> 59 #include <linux/init.h> 60 #include <linux/highmem.h> 61 62 #include <net/protocol.h> 63 #include <net/dst.h> 64 #include <net/sock.h> 65 #include <net/checksum.h> 66 #include <net/xfrm.h> 67 68 #include <asm/uaccess.h> 69 #include <asm/system.h> 70 71 static kmem_cache_t *skbuff_head_cache __read_mostly; 72 static kmem_cache_t *skbuff_fclone_cache __read_mostly; 73 74 /* 75 * Keep out-of-line to prevent kernel bloat. 76 * __builtin_return_address is not used because it is not always 77 * reliable. 78 */ 79 80 /** 81 * skb_over_panic - private function 82 * @skb: buffer 83 * @sz: size 84 * @here: address 85 * 86 * Out of line support code for skb_put(). Not user callable. 87 */ 88 void skb_over_panic(struct sk_buff *skb, int sz, void *here) 89 { 90 printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p " 91 "data:%p tail:%p end:%p dev:%s\n", 92 here, skb->len, sz, skb->head, skb->data, skb->tail, skb->end, 93 skb->dev ? skb->dev->name : "<NULL>"); 94 BUG(); 95 } 96 97 /** 98 * skb_under_panic - private function 99 * @skb: buffer 100 * @sz: size 101 * @here: address 102 * 103 * Out of line support code for skb_push(). Not user callable. 104 */ 105 106 void skb_under_panic(struct sk_buff *skb, int sz, void *here) 107 { 108 printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p " 109 "data:%p tail:%p end:%p dev:%s\n", 110 here, skb->len, sz, skb->head, skb->data, skb->tail, skb->end, 111 skb->dev ? skb->dev->name : "<NULL>"); 112 BUG(); 113 } 114 115 /* Allocate a new skbuff. We do this ourselves so we can fill in a few 116 * 'private' fields and also do memory statistics to find all the 117 * [BEEP] leaks. 118 * 119 */ 120 121 /** 122 * __alloc_skb - allocate a network buffer 123 * @size: size to allocate 124 * @gfp_mask: allocation mask 125 * 126 * Allocate a new &sk_buff. The returned buffer has no headroom and a 127 * tail room of size bytes. The object has a reference count of one. 128 * The return is the buffer. On a failure the return is %NULL. 129 * 130 * Buffers may only be allocated from interrupts using a @gfp_mask of 131 * %GFP_ATOMIC. 132 */ 133 struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, 134 int fclone) 135 { 136 struct sk_buff *skb; 137 u8 *data; 138 139 /* Get the HEAD */ 140 if (fclone) 141 skb = kmem_cache_alloc(skbuff_fclone_cache, 142 gfp_mask & ~__GFP_DMA); 143 else 144 skb = kmem_cache_alloc(skbuff_head_cache, 145 gfp_mask & ~__GFP_DMA); 146 147 if (!skb) 148 goto out; 149 150 /* Get the DATA. Size must match skb_add_mtu(). */ 151 size = SKB_DATA_ALIGN(size); 152 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); 153 if (!data) 154 goto nodata; 155 156 memset(skb, 0, offsetof(struct sk_buff, truesize)); 157 skb->truesize = size + sizeof(struct sk_buff); 158 atomic_set(&skb->users, 1); 159 skb->head = data; 160 skb->data = data; 161 skb->tail = data; 162 skb->end = data + size; 163 if (fclone) { 164 struct sk_buff *child = skb + 1; 165 atomic_t *fclone_ref = (atomic_t *) (child + 1); 166 167 skb->fclone = SKB_FCLONE_ORIG; 168 atomic_set(fclone_ref, 1); 169 170 child->fclone = SKB_FCLONE_UNAVAILABLE; 171 } 172 atomic_set(&(skb_shinfo(skb)->dataref), 1); 173 skb_shinfo(skb)->nr_frags = 0; 174 skb_shinfo(skb)->tso_size = 0; 175 skb_shinfo(skb)->tso_segs = 0; 176 skb_shinfo(skb)->frag_list = NULL; 177 out: 178 return skb; 179 nodata: 180 kmem_cache_free(skbuff_head_cache, skb); 181 skb = NULL; 182 goto out; 183 } 184 185 /** 186 * alloc_skb_from_cache - allocate a network buffer 187 * @cp: kmem_cache from which to allocate the data area 188 * (object size must be big enough for @size bytes + skb overheads) 189 * @size: size to allocate 190 * @gfp_mask: allocation mask 191 * 192 * Allocate a new &sk_buff. The returned buffer has no headroom and 193 * tail room of size bytes. The object has a reference count of one. 194 * The return is the buffer. On a failure the return is %NULL. 195 * 196 * Buffers may only be allocated from interrupts using a @gfp_mask of 197 * %GFP_ATOMIC. 198 */ 199 struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, 200 unsigned int size, 201 gfp_t gfp_mask) 202 { 203 struct sk_buff *skb; 204 u8 *data; 205 206 /* Get the HEAD */ 207 skb = kmem_cache_alloc(skbuff_head_cache, 208 gfp_mask & ~__GFP_DMA); 209 if (!skb) 210 goto out; 211 212 /* Get the DATA. */ 213 size = SKB_DATA_ALIGN(size); 214 data = kmem_cache_alloc(cp, gfp_mask); 215 if (!data) 216 goto nodata; 217 218 memset(skb, 0, offsetof(struct sk_buff, truesize)); 219 skb->truesize = size + sizeof(struct sk_buff); 220 atomic_set(&skb->users, 1); 221 skb->head = data; 222 skb->data = data; 223 skb->tail = data; 224 skb->end = data + size; 225 226 atomic_set(&(skb_shinfo(skb)->dataref), 1); 227 skb_shinfo(skb)->nr_frags = 0; 228 skb_shinfo(skb)->tso_size = 0; 229 skb_shinfo(skb)->tso_segs = 0; 230 skb_shinfo(skb)->frag_list = NULL; 231 out: 232 return skb; 233 nodata: 234 kmem_cache_free(skbuff_head_cache, skb); 235 skb = NULL; 236 goto out; 237 } 238 239 240 static void skb_drop_fraglist(struct sk_buff *skb) 241 { 242 struct sk_buff *list = skb_shinfo(skb)->frag_list; 243 244 skb_shinfo(skb)->frag_list = NULL; 245 246 do { 247 struct sk_buff *this = list; 248 list = list->next; 249 kfree_skb(this); 250 } while (list); 251 } 252 253 static void skb_clone_fraglist(struct sk_buff *skb) 254 { 255 struct sk_buff *list; 256 257 for (list = skb_shinfo(skb)->frag_list; list; list = list->next) 258 skb_get(list); 259 } 260 261 void skb_release_data(struct sk_buff *skb) 262 { 263 if (!skb->cloned || 264 !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1, 265 &skb_shinfo(skb)->dataref)) { 266 if (skb_shinfo(skb)->nr_frags) { 267 int i; 268 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 269 put_page(skb_shinfo(skb)->frags[i].page); 270 } 271 272 if (skb_shinfo(skb)->frag_list) 273 skb_drop_fraglist(skb); 274 275 kfree(skb->head); 276 } 277 } 278 279 /* 280 * Free an skbuff by memory without cleaning the state. 281 */ 282 void kfree_skbmem(struct sk_buff *skb) 283 { 284 struct sk_buff *other; 285 atomic_t *fclone_ref; 286 287 skb_release_data(skb); 288 switch (skb->fclone) { 289 case SKB_FCLONE_UNAVAILABLE: 290 kmem_cache_free(skbuff_head_cache, skb); 291 break; 292 293 case SKB_FCLONE_ORIG: 294 fclone_ref = (atomic_t *) (skb + 2); 295 if (atomic_dec_and_test(fclone_ref)) 296 kmem_cache_free(skbuff_fclone_cache, skb); 297 break; 298 299 case SKB_FCLONE_CLONE: 300 fclone_ref = (atomic_t *) (skb + 1); 301 other = skb - 1; 302 303 /* The clone portion is available for 304 * fast-cloning again. 305 */ 306 skb->fclone = SKB_FCLONE_UNAVAILABLE; 307 308 if (atomic_dec_and_test(fclone_ref)) 309 kmem_cache_free(skbuff_fclone_cache, other); 310 break; 311 }; 312 } 313 314 /** 315 * __kfree_skb - private function 316 * @skb: buffer 317 * 318 * Free an sk_buff. Release anything attached to the buffer. 319 * Clean the state. This is an internal helper function. Users should 320 * always call kfree_skb 321 */ 322 323 void __kfree_skb(struct sk_buff *skb) 324 { 325 dst_release(skb->dst); 326 #ifdef CONFIG_XFRM 327 secpath_put(skb->sp); 328 #endif 329 if (skb->destructor) { 330 WARN_ON(in_irq()); 331 skb->destructor(skb); 332 } 333 #ifdef CONFIG_NETFILTER 334 nf_conntrack_put(skb->nfct); 335 #ifdef CONFIG_BRIDGE_NETFILTER 336 nf_bridge_put(skb->nf_bridge); 337 #endif 338 #endif 339 /* XXX: IS this still necessary? - JHS */ 340 #ifdef CONFIG_NET_SCHED 341 skb->tc_index = 0; 342 #ifdef CONFIG_NET_CLS_ACT 343 skb->tc_verd = 0; 344 #endif 345 #endif 346 347 kfree_skbmem(skb); 348 } 349 350 /** 351 * skb_clone - duplicate an sk_buff 352 * @skb: buffer to clone 353 * @gfp_mask: allocation priority 354 * 355 * Duplicate an &sk_buff. The new one is not owned by a socket. Both 356 * copies share the same packet data but not structure. The new 357 * buffer has a reference count of 1. If the allocation fails the 358 * function returns %NULL otherwise the new buffer is returned. 359 * 360 * If this function is called from an interrupt gfp_mask() must be 361 * %GFP_ATOMIC. 362 */ 363 364 struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) 365 { 366 struct sk_buff *n; 367 368 n = skb + 1; 369 if (skb->fclone == SKB_FCLONE_ORIG && 370 n->fclone == SKB_FCLONE_UNAVAILABLE) { 371 atomic_t *fclone_ref = (atomic_t *) (n + 1); 372 n->fclone = SKB_FCLONE_CLONE; 373 atomic_inc(fclone_ref); 374 } else { 375 n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); 376 if (!n) 377 return NULL; 378 n->fclone = SKB_FCLONE_UNAVAILABLE; 379 } 380 381 #define C(x) n->x = skb->x 382 383 n->next = n->prev = NULL; 384 n->sk = NULL; 385 C(tstamp); 386 C(dev); 387 C(h); 388 C(nh); 389 C(mac); 390 C(dst); 391 dst_clone(skb->dst); 392 C(sp); 393 #ifdef CONFIG_INET 394 secpath_get(skb->sp); 395 #endif 396 memcpy(n->cb, skb->cb, sizeof(skb->cb)); 397 C(len); 398 C(data_len); 399 C(csum); 400 C(local_df); 401 n->cloned = 1; 402 n->nohdr = 0; 403 C(pkt_type); 404 C(ip_summed); 405 C(priority); 406 C(protocol); 407 n->destructor = NULL; 408 #ifdef CONFIG_NETFILTER 409 C(nfmark); 410 C(nfct); 411 nf_conntrack_get(skb->nfct); 412 C(nfctinfo); 413 #ifdef CONFIG_BRIDGE_NETFILTER 414 C(nf_bridge); 415 nf_bridge_get(skb->nf_bridge); 416 #endif 417 #endif /*CONFIG_NETFILTER*/ 418 #ifdef CONFIG_NET_SCHED 419 C(tc_index); 420 #ifdef CONFIG_NET_CLS_ACT 421 n->tc_verd = SET_TC_VERD(skb->tc_verd,0); 422 n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd); 423 n->tc_verd = CLR_TC_MUNGED(n->tc_verd); 424 C(input_dev); 425 #endif 426 427 #endif 428 C(truesize); 429 atomic_set(&n->users, 1); 430 C(head); 431 C(data); 432 C(tail); 433 C(end); 434 435 atomic_inc(&(skb_shinfo(skb)->dataref)); 436 skb->cloned = 1; 437 438 return n; 439 } 440 441 static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) 442 { 443 /* 444 * Shift between the two data areas in bytes 445 */ 446 unsigned long offset = new->data - old->data; 447 448 new->sk = NULL; 449 new->dev = old->dev; 450 new->priority = old->priority; 451 new->protocol = old->protocol; 452 new->dst = dst_clone(old->dst); 453 #ifdef CONFIG_INET 454 new->sp = secpath_get(old->sp); 455 #endif 456 new->h.raw = old->h.raw + offset; 457 new->nh.raw = old->nh.raw + offset; 458 new->mac.raw = old->mac.raw + offset; 459 memcpy(new->cb, old->cb, sizeof(old->cb)); 460 new->local_df = old->local_df; 461 new->fclone = SKB_FCLONE_UNAVAILABLE; 462 new->pkt_type = old->pkt_type; 463 new->tstamp = old->tstamp; 464 new->destructor = NULL; 465 #ifdef CONFIG_NETFILTER 466 new->nfmark = old->nfmark; 467 new->nfct = old->nfct; 468 nf_conntrack_get(old->nfct); 469 new->nfctinfo = old->nfctinfo; 470 #ifdef CONFIG_BRIDGE_NETFILTER 471 new->nf_bridge = old->nf_bridge; 472 nf_bridge_get(old->nf_bridge); 473 #endif 474 #endif 475 #ifdef CONFIG_NET_SCHED 476 #ifdef CONFIG_NET_CLS_ACT 477 new->tc_verd = old->tc_verd; 478 #endif 479 new->tc_index = old->tc_index; 480 #endif 481 atomic_set(&new->users, 1); 482 skb_shinfo(new)->tso_size = skb_shinfo(old)->tso_size; 483 skb_shinfo(new)->tso_segs = skb_shinfo(old)->tso_segs; 484 } 485 486 /** 487 * skb_copy - create private copy of an sk_buff 488 * @skb: buffer to copy 489 * @gfp_mask: allocation priority 490 * 491 * Make a copy of both an &sk_buff and its data. This is used when the 492 * caller wishes to modify the data and needs a private copy of the 493 * data to alter. Returns %NULL on failure or the pointer to the buffer 494 * on success. The returned buffer has a reference count of 1. 495 * 496 * As by-product this function converts non-linear &sk_buff to linear 497 * one, so that &sk_buff becomes completely private and caller is allowed 498 * to modify all the data of returned buffer. This means that this 499 * function is not recommended for use in circumstances when only 500 * header is going to be modified. Use pskb_copy() instead. 501 */ 502 503 struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) 504 { 505 int headerlen = skb->data - skb->head; 506 /* 507 * Allocate the copy buffer 508 */ 509 struct sk_buff *n = alloc_skb(skb->end - skb->head + skb->data_len, 510 gfp_mask); 511 if (!n) 512 return NULL; 513 514 /* Set the data pointer */ 515 skb_reserve(n, headerlen); 516 /* Set the tail pointer and length */ 517 skb_put(n, skb->len); 518 n->csum = skb->csum; 519 n->ip_summed = skb->ip_summed; 520 521 if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)) 522 BUG(); 523 524 copy_skb_header(n, skb); 525 return n; 526 } 527 528 529 /** 530 * pskb_copy - create copy of an sk_buff with private head. 531 * @skb: buffer to copy 532 * @gfp_mask: allocation priority 533 * 534 * Make a copy of both an &sk_buff and part of its data, located 535 * in header. Fragmented data remain shared. This is used when 536 * the caller wishes to modify only header of &sk_buff and needs 537 * private copy of the header to alter. Returns %NULL on failure 538 * or the pointer to the buffer on success. 539 * The returned buffer has a reference count of 1. 540 */ 541 542 struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) 543 { 544 /* 545 * Allocate the copy buffer 546 */ 547 struct sk_buff *n = alloc_skb(skb->end - skb->head, gfp_mask); 548 549 if (!n) 550 goto out; 551 552 /* Set the data pointer */ 553 skb_reserve(n, skb->data - skb->head); 554 /* Set the tail pointer and length */ 555 skb_put(n, skb_headlen(skb)); 556 /* Copy the bytes */ 557 memcpy(n->data, skb->data, n->len); 558 n->csum = skb->csum; 559 n->ip_summed = skb->ip_summed; 560 561 n->data_len = skb->data_len; 562 n->len = skb->len; 563 564 if (skb_shinfo(skb)->nr_frags) { 565 int i; 566 567 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 568 skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; 569 get_page(skb_shinfo(n)->frags[i].page); 570 } 571 skb_shinfo(n)->nr_frags = i; 572 } 573 574 if (skb_shinfo(skb)->frag_list) { 575 skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; 576 skb_clone_fraglist(n); 577 } 578 579 copy_skb_header(n, skb); 580 out: 581 return n; 582 } 583 584 /** 585 * pskb_expand_head - reallocate header of &sk_buff 586 * @skb: buffer to reallocate 587 * @nhead: room to add at head 588 * @ntail: room to add at tail 589 * @gfp_mask: allocation priority 590 * 591 * Expands (or creates identical copy, if &nhead and &ntail are zero) 592 * header of skb. &sk_buff itself is not changed. &sk_buff MUST have 593 * reference count of 1. Returns zero in the case of success or error, 594 * if expansion failed. In the last case, &sk_buff is not changed. 595 * 596 * All the pointers pointing into skb header may change and must be 597 * reloaded after call to this function. 598 */ 599 600 int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, 601 gfp_t gfp_mask) 602 { 603 int i; 604 u8 *data; 605 int size = nhead + (skb->end - skb->head) + ntail; 606 long off; 607 608 if (skb_shared(skb)) 609 BUG(); 610 611 size = SKB_DATA_ALIGN(size); 612 613 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); 614 if (!data) 615 goto nodata; 616 617 /* Copy only real data... and, alas, header. This should be 618 * optimized for the cases when header is void. */ 619 memcpy(data + nhead, skb->head, skb->tail - skb->head); 620 memcpy(data + size, skb->end, sizeof(struct skb_shared_info)); 621 622 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 623 get_page(skb_shinfo(skb)->frags[i].page); 624 625 if (skb_shinfo(skb)->frag_list) 626 skb_clone_fraglist(skb); 627 628 skb_release_data(skb); 629 630 off = (data + nhead) - skb->head; 631 632 skb->head = data; 633 skb->end = data + size; 634 skb->data += off; 635 skb->tail += off; 636 skb->mac.raw += off; 637 skb->h.raw += off; 638 skb->nh.raw += off; 639 skb->cloned = 0; 640 skb->nohdr = 0; 641 atomic_set(&skb_shinfo(skb)->dataref, 1); 642 return 0; 643 644 nodata: 645 return -ENOMEM; 646 } 647 648 /* Make private copy of skb with writable head and some headroom */ 649 650 struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) 651 { 652 struct sk_buff *skb2; 653 int delta = headroom - skb_headroom(skb); 654 655 if (delta <= 0) 656 skb2 = pskb_copy(skb, GFP_ATOMIC); 657 else { 658 skb2 = skb_clone(skb, GFP_ATOMIC); 659 if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, 660 GFP_ATOMIC)) { 661 kfree_skb(skb2); 662 skb2 = NULL; 663 } 664 } 665 return skb2; 666 } 667 668 669 /** 670 * skb_copy_expand - copy and expand sk_buff 671 * @skb: buffer to copy 672 * @newheadroom: new free bytes at head 673 * @newtailroom: new free bytes at tail 674 * @gfp_mask: allocation priority 675 * 676 * Make a copy of both an &sk_buff and its data and while doing so 677 * allocate additional space. 678 * 679 * This is used when the caller wishes to modify the data and needs a 680 * private copy of the data to alter as well as more space for new fields. 681 * Returns %NULL on failure or the pointer to the buffer 682 * on success. The returned buffer has a reference count of 1. 683 * 684 * You must pass %GFP_ATOMIC as the allocation priority if this function 685 * is called from an interrupt. 686 * 687 * BUG ALERT: ip_summed is not copied. Why does this work? Is it used 688 * only by netfilter in the cases when checksum is recalculated? --ANK 689 */ 690 struct sk_buff *skb_copy_expand(const struct sk_buff *skb, 691 int newheadroom, int newtailroom, 692 gfp_t gfp_mask) 693 { 694 /* 695 * Allocate the copy buffer 696 */ 697 struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom, 698 gfp_mask); 699 int head_copy_len, head_copy_off; 700 701 if (!n) 702 return NULL; 703 704 skb_reserve(n, newheadroom); 705 706 /* Set the tail pointer and length */ 707 skb_put(n, skb->len); 708 709 head_copy_len = skb_headroom(skb); 710 head_copy_off = 0; 711 if (newheadroom <= head_copy_len) 712 head_copy_len = newheadroom; 713 else 714 head_copy_off = newheadroom - head_copy_len; 715 716 /* Copy the linear header and data. */ 717 if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off, 718 skb->len + head_copy_len)) 719 BUG(); 720 721 copy_skb_header(n, skb); 722 723 return n; 724 } 725 726 /** 727 * skb_pad - zero pad the tail of an skb 728 * @skb: buffer to pad 729 * @pad: space to pad 730 * 731 * Ensure that a buffer is followed by a padding area that is zero 732 * filled. Used by network drivers which may DMA or transfer data 733 * beyond the buffer end onto the wire. 734 * 735 * May return NULL in out of memory cases. 736 */ 737 738 struct sk_buff *skb_pad(struct sk_buff *skb, int pad) 739 { 740 struct sk_buff *nskb; 741 742 /* If the skbuff is non linear tailroom is always zero.. */ 743 if (skb_tailroom(skb) >= pad) { 744 memset(skb->data+skb->len, 0, pad); 745 return skb; 746 } 747 748 nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + pad, GFP_ATOMIC); 749 kfree_skb(skb); 750 if (nskb) 751 memset(nskb->data+nskb->len, 0, pad); 752 return nskb; 753 } 754 755 /* Trims skb to length len. It can change skb pointers, if "realloc" is 1. 756 * If realloc==0 and trimming is impossible without change of data, 757 * it is BUG(). 758 */ 759 760 int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc) 761 { 762 int offset = skb_headlen(skb); 763 int nfrags = skb_shinfo(skb)->nr_frags; 764 int i; 765 766 for (i = 0; i < nfrags; i++) { 767 int end = offset + skb_shinfo(skb)->frags[i].size; 768 if (end > len) { 769 if (skb_cloned(skb)) { 770 if (!realloc) 771 BUG(); 772 if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) 773 return -ENOMEM; 774 } 775 if (len <= offset) { 776 put_page(skb_shinfo(skb)->frags[i].page); 777 skb_shinfo(skb)->nr_frags--; 778 } else { 779 skb_shinfo(skb)->frags[i].size = len - offset; 780 } 781 } 782 offset = end; 783 } 784 785 if (offset < len) { 786 skb->data_len -= skb->len - len; 787 skb->len = len; 788 } else { 789 if (len <= skb_headlen(skb)) { 790 skb->len = len; 791 skb->data_len = 0; 792 skb->tail = skb->data + len; 793 if (skb_shinfo(skb)->frag_list && !skb_cloned(skb)) 794 skb_drop_fraglist(skb); 795 } else { 796 skb->data_len -= skb->len - len; 797 skb->len = len; 798 } 799 } 800 801 return 0; 802 } 803 804 /** 805 * __pskb_pull_tail - advance tail of skb header 806 * @skb: buffer to reallocate 807 * @delta: number of bytes to advance tail 808 * 809 * The function makes a sense only on a fragmented &sk_buff, 810 * it expands header moving its tail forward and copying necessary 811 * data from fragmented part. 812 * 813 * &sk_buff MUST have reference count of 1. 814 * 815 * Returns %NULL (and &sk_buff does not change) if pull failed 816 * or value of new tail of skb in the case of success. 817 * 818 * All the pointers pointing into skb header may change and must be 819 * reloaded after call to this function. 820 */ 821 822 /* Moves tail of skb head forward, copying data from fragmented part, 823 * when it is necessary. 824 * 1. It may fail due to malloc failure. 825 * 2. It may change skb pointers. 826 * 827 * It is pretty complicated. Luckily, it is called only in exceptional cases. 828 */ 829 unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta) 830 { 831 /* If skb has not enough free space at tail, get new one 832 * plus 128 bytes for future expansions. If we have enough 833 * room at tail, reallocate without expansion only if skb is cloned. 834 */ 835 int i, k, eat = (skb->tail + delta) - skb->end; 836 837 if (eat > 0 || skb_cloned(skb)) { 838 if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0, 839 GFP_ATOMIC)) 840 return NULL; 841 } 842 843 if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta)) 844 BUG(); 845 846 /* Optimization: no fragments, no reasons to preestimate 847 * size of pulled pages. Superb. 848 */ 849 if (!skb_shinfo(skb)->frag_list) 850 goto pull_pages; 851 852 /* Estimate size of pulled pages. */ 853 eat = delta; 854 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 855 if (skb_shinfo(skb)->frags[i].size >= eat) 856 goto pull_pages; 857 eat -= skb_shinfo(skb)->frags[i].size; 858 } 859 860 /* If we need update frag list, we are in troubles. 861 * Certainly, it possible to add an offset to skb data, 862 * but taking into account that pulling is expected to 863 * be very rare operation, it is worth to fight against 864 * further bloating skb head and crucify ourselves here instead. 865 * Pure masohism, indeed. 8)8) 866 */ 867 if (eat) { 868 struct sk_buff *list = skb_shinfo(skb)->frag_list; 869 struct sk_buff *clone = NULL; 870 struct sk_buff *insp = NULL; 871 872 do { 873 if (!list) 874 BUG(); 875 876 if (list->len <= eat) { 877 /* Eaten as whole. */ 878 eat -= list->len; 879 list = list->next; 880 insp = list; 881 } else { 882 /* Eaten partially. */ 883 884 if (skb_shared(list)) { 885 /* Sucks! We need to fork list. :-( */ 886 clone = skb_clone(list, GFP_ATOMIC); 887 if (!clone) 888 return NULL; 889 insp = list->next; 890 list = clone; 891 } else { 892 /* This may be pulled without 893 * problems. */ 894 insp = list; 895 } 896 if (!pskb_pull(list, eat)) { 897 if (clone) 898 kfree_skb(clone); 899 return NULL; 900 } 901 break; 902 } 903 } while (eat); 904 905 /* Free pulled out fragments. */ 906 while ((list = skb_shinfo(skb)->frag_list) != insp) { 907 skb_shinfo(skb)->frag_list = list->next; 908 kfree_skb(list); 909 } 910 /* And insert new clone at head. */ 911 if (clone) { 912 clone->next = list; 913 skb_shinfo(skb)->frag_list = clone; 914 } 915 } 916 /* Success! Now we may commit changes to skb data. */ 917 918 pull_pages: 919 eat = delta; 920 k = 0; 921 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 922 if (skb_shinfo(skb)->frags[i].size <= eat) { 923 put_page(skb_shinfo(skb)->frags[i].page); 924 eat -= skb_shinfo(skb)->frags[i].size; 925 } else { 926 skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; 927 if (eat) { 928 skb_shinfo(skb)->frags[k].page_offset += eat; 929 skb_shinfo(skb)->frags[k].size -= eat; 930 eat = 0; 931 } 932 k++; 933 } 934 } 935 skb_shinfo(skb)->nr_frags = k; 936 937 skb->tail += delta; 938 skb->data_len -= delta; 939 940 return skb->tail; 941 } 942 943 /* Copy some data bits from skb to kernel buffer. */ 944 945 int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) 946 { 947 int i, copy; 948 int start = skb_headlen(skb); 949 950 if (offset > (int)skb->len - len) 951 goto fault; 952 953 /* Copy header. */ 954 if ((copy = start - offset) > 0) { 955 if (copy > len) 956 copy = len; 957 memcpy(to, skb->data + offset, copy); 958 if ((len -= copy) == 0) 959 return 0; 960 offset += copy; 961 to += copy; 962 } 963 964 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 965 int end; 966 967 BUG_TRAP(start <= offset + len); 968 969 end = start + skb_shinfo(skb)->frags[i].size; 970 if ((copy = end - offset) > 0) { 971 u8 *vaddr; 972 973 if (copy > len) 974 copy = len; 975 976 vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); 977 memcpy(to, 978 vaddr + skb_shinfo(skb)->frags[i].page_offset+ 979 offset - start, copy); 980 kunmap_skb_frag(vaddr); 981 982 if ((len -= copy) == 0) 983 return 0; 984 offset += copy; 985 to += copy; 986 } 987 start = end; 988 } 989 990 if (skb_shinfo(skb)->frag_list) { 991 struct sk_buff *list = skb_shinfo(skb)->frag_list; 992 993 for (; list; list = list->next) { 994 int end; 995 996 BUG_TRAP(start <= offset + len); 997 998 end = start + list->len; 999 if ((copy = end - offset) > 0) { 1000 if (copy > len) 1001 copy = len; 1002 if (skb_copy_bits(list, offset - start, 1003 to, copy)) 1004 goto fault; 1005 if ((len -= copy) == 0) 1006 return 0; 1007 offset += copy; 1008 to += copy; 1009 } 1010 start = end; 1011 } 1012 } 1013 if (!len) 1014 return 0; 1015 1016 fault: 1017 return -EFAULT; 1018 } 1019 1020 /** 1021 * skb_store_bits - store bits from kernel buffer to skb 1022 * @skb: destination buffer 1023 * @offset: offset in destination 1024 * @from: source buffer 1025 * @len: number of bytes to copy 1026 * 1027 * Copy the specified number of bytes from the source buffer to the 1028 * destination skb. This function handles all the messy bits of 1029 * traversing fragment lists and such. 1030 */ 1031 1032 int skb_store_bits(const struct sk_buff *skb, int offset, void *from, int len) 1033 { 1034 int i, copy; 1035 int start = skb_headlen(skb); 1036 1037 if (offset > (int)skb->len - len) 1038 goto fault; 1039 1040 if ((copy = start - offset) > 0) { 1041 if (copy > len) 1042 copy = len; 1043 memcpy(skb->data + offset, from, copy); 1044 if ((len -= copy) == 0) 1045 return 0; 1046 offset += copy; 1047 from += copy; 1048 } 1049 1050 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1051 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 1052 int end; 1053 1054 BUG_TRAP(start <= offset + len); 1055 1056 end = start + frag->size; 1057 if ((copy = end - offset) > 0) { 1058 u8 *vaddr; 1059 1060 if (copy > len) 1061 copy = len; 1062 1063 vaddr = kmap_skb_frag(frag); 1064 memcpy(vaddr + frag->page_offset + offset - start, 1065 from, copy); 1066 kunmap_skb_frag(vaddr); 1067 1068 if ((len -= copy) == 0) 1069 return 0; 1070 offset += copy; 1071 from += copy; 1072 } 1073 start = end; 1074 } 1075 1076 if (skb_shinfo(skb)->frag_list) { 1077 struct sk_buff *list = skb_shinfo(skb)->frag_list; 1078 1079 for (; list; list = list->next) { 1080 int end; 1081 1082 BUG_TRAP(start <= offset + len); 1083 1084 end = start + list->len; 1085 if ((copy = end - offset) > 0) { 1086 if (copy > len) 1087 copy = len; 1088 if (skb_store_bits(list, offset - start, 1089 from, copy)) 1090 goto fault; 1091 if ((len -= copy) == 0) 1092 return 0; 1093 offset += copy; 1094 from += copy; 1095 } 1096 start = end; 1097 } 1098 } 1099 if (!len) 1100 return 0; 1101 1102 fault: 1103 return -EFAULT; 1104 } 1105 1106 EXPORT_SYMBOL(skb_store_bits); 1107 1108 /* Checksum skb data. */ 1109 1110 unsigned int skb_checksum(const struct sk_buff *skb, int offset, 1111 int len, unsigned int csum) 1112 { 1113 int start = skb_headlen(skb); 1114 int i, copy = start - offset; 1115 int pos = 0; 1116 1117 /* Checksum header. */ 1118 if (copy > 0) { 1119 if (copy > len) 1120 copy = len; 1121 csum = csum_partial(skb->data + offset, copy, csum); 1122 if ((len -= copy) == 0) 1123 return csum; 1124 offset += copy; 1125 pos = copy; 1126 } 1127 1128 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1129 int end; 1130 1131 BUG_TRAP(start <= offset + len); 1132 1133 end = start + skb_shinfo(skb)->frags[i].size; 1134 if ((copy = end - offset) > 0) { 1135 unsigned int csum2; 1136 u8 *vaddr; 1137 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 1138 1139 if (copy > len) 1140 copy = len; 1141 vaddr = kmap_skb_frag(frag); 1142 csum2 = csum_partial(vaddr + frag->page_offset + 1143 offset - start, copy, 0); 1144 kunmap_skb_frag(vaddr); 1145 csum = csum_block_add(csum, csum2, pos); 1146 if (!(len -= copy)) 1147 return csum; 1148 offset += copy; 1149 pos += copy; 1150 } 1151 start = end; 1152 } 1153 1154 if (skb_shinfo(skb)->frag_list) { 1155 struct sk_buff *list = skb_shinfo(skb)->frag_list; 1156 1157 for (; list; list = list->next) { 1158 int end; 1159 1160 BUG_TRAP(start <= offset + len); 1161 1162 end = start + list->len; 1163 if ((copy = end - offset) > 0) { 1164 unsigned int csum2; 1165 if (copy > len) 1166 copy = len; 1167 csum2 = skb_checksum(list, offset - start, 1168 copy, 0); 1169 csum = csum_block_add(csum, csum2, pos); 1170 if ((len -= copy) == 0) 1171 return csum; 1172 offset += copy; 1173 pos += copy; 1174 } 1175 start = end; 1176 } 1177 } 1178 if (len) 1179 BUG(); 1180 1181 return csum; 1182 } 1183 1184 /* Both of above in one bottle. */ 1185 1186 unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, 1187 u8 *to, int len, unsigned int csum) 1188 { 1189 int start = skb_headlen(skb); 1190 int i, copy = start - offset; 1191 int pos = 0; 1192 1193 /* Copy header. */ 1194 if (copy > 0) { 1195 if (copy > len) 1196 copy = len; 1197 csum = csum_partial_copy_nocheck(skb->data + offset, to, 1198 copy, csum); 1199 if ((len -= copy) == 0) 1200 return csum; 1201 offset += copy; 1202 to += copy; 1203 pos = copy; 1204 } 1205 1206 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1207 int end; 1208 1209 BUG_TRAP(start <= offset + len); 1210 1211 end = start + skb_shinfo(skb)->frags[i].size; 1212 if ((copy = end - offset) > 0) { 1213 unsigned int csum2; 1214 u8 *vaddr; 1215 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 1216 1217 if (copy > len) 1218 copy = len; 1219 vaddr = kmap_skb_frag(frag); 1220 csum2 = csum_partial_copy_nocheck(vaddr + 1221 frag->page_offset + 1222 offset - start, to, 1223 copy, 0); 1224 kunmap_skb_frag(vaddr); 1225 csum = csum_block_add(csum, csum2, pos); 1226 if (!(len -= copy)) 1227 return csum; 1228 offset += copy; 1229 to += copy; 1230 pos += copy; 1231 } 1232 start = end; 1233 } 1234 1235 if (skb_shinfo(skb)->frag_list) { 1236 struct sk_buff *list = skb_shinfo(skb)->frag_list; 1237 1238 for (; list; list = list->next) { 1239 unsigned int csum2; 1240 int end; 1241 1242 BUG_TRAP(start <= offset + len); 1243 1244 end = start + list->len; 1245 if ((copy = end - offset) > 0) { 1246 if (copy > len) 1247 copy = len; 1248 csum2 = skb_copy_and_csum_bits(list, 1249 offset - start, 1250 to, copy, 0); 1251 csum = csum_block_add(csum, csum2, pos); 1252 if ((len -= copy) == 0) 1253 return csum; 1254 offset += copy; 1255 to += copy; 1256 pos += copy; 1257 } 1258 start = end; 1259 } 1260 } 1261 if (len) 1262 BUG(); 1263 return csum; 1264 } 1265 1266 void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) 1267 { 1268 unsigned int csum; 1269 long csstart; 1270 1271 if (skb->ip_summed == CHECKSUM_HW) 1272 csstart = skb->h.raw - skb->data; 1273 else 1274 csstart = skb_headlen(skb); 1275 1276 if (csstart > skb_headlen(skb)) 1277 BUG(); 1278 1279 memcpy(to, skb->data, csstart); 1280 1281 csum = 0; 1282 if (csstart != skb->len) 1283 csum = skb_copy_and_csum_bits(skb, csstart, to + csstart, 1284 skb->len - csstart, 0); 1285 1286 if (skb->ip_summed == CHECKSUM_HW) { 1287 long csstuff = csstart + skb->csum; 1288 1289 *((unsigned short *)(to + csstuff)) = csum_fold(csum); 1290 } 1291 } 1292 1293 /** 1294 * skb_dequeue - remove from the head of the queue 1295 * @list: list to dequeue from 1296 * 1297 * Remove the head of the list. The list lock is taken so the function 1298 * may be used safely with other locking list functions. The head item is 1299 * returned or %NULL if the list is empty. 1300 */ 1301 1302 struct sk_buff *skb_dequeue(struct sk_buff_head *list) 1303 { 1304 unsigned long flags; 1305 struct sk_buff *result; 1306 1307 spin_lock_irqsave(&list->lock, flags); 1308 result = __skb_dequeue(list); 1309 spin_unlock_irqrestore(&list->lock, flags); 1310 return result; 1311 } 1312 1313 /** 1314 * skb_dequeue_tail - remove from the tail of the queue 1315 * @list: list to dequeue from 1316 * 1317 * Remove the tail of the list. The list lock is taken so the function 1318 * may be used safely with other locking list functions. The tail item is 1319 * returned or %NULL if the list is empty. 1320 */ 1321 struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list) 1322 { 1323 unsigned long flags; 1324 struct sk_buff *result; 1325 1326 spin_lock_irqsave(&list->lock, flags); 1327 result = __skb_dequeue_tail(list); 1328 spin_unlock_irqrestore(&list->lock, flags); 1329 return result; 1330 } 1331 1332 /** 1333 * skb_queue_purge - empty a list 1334 * @list: list to empty 1335 * 1336 * Delete all buffers on an &sk_buff list. Each buffer is removed from 1337 * the list and one reference dropped. This function takes the list 1338 * lock and is atomic with respect to other list locking functions. 1339 */ 1340 void skb_queue_purge(struct sk_buff_head *list) 1341 { 1342 struct sk_buff *skb; 1343 while ((skb = skb_dequeue(list)) != NULL) 1344 kfree_skb(skb); 1345 } 1346 1347 /** 1348 * skb_queue_head - queue a buffer at the list head 1349 * @list: list to use 1350 * @newsk: buffer to queue 1351 * 1352 * Queue a buffer at the start of the list. This function takes the 1353 * list lock and can be used safely with other locking &sk_buff functions 1354 * safely. 1355 * 1356 * A buffer cannot be placed on two lists at the same time. 1357 */ 1358 void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) 1359 { 1360 unsigned long flags; 1361 1362 spin_lock_irqsave(&list->lock, flags); 1363 __skb_queue_head(list, newsk); 1364 spin_unlock_irqrestore(&list->lock, flags); 1365 } 1366 1367 /** 1368 * skb_queue_tail - queue a buffer at the list tail 1369 * @list: list to use 1370 * @newsk: buffer to queue 1371 * 1372 * Queue a buffer at the tail of the list. This function takes the 1373 * list lock and can be used safely with other locking &sk_buff functions 1374 * safely. 1375 * 1376 * A buffer cannot be placed on two lists at the same time. 1377 */ 1378 void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) 1379 { 1380 unsigned long flags; 1381 1382 spin_lock_irqsave(&list->lock, flags); 1383 __skb_queue_tail(list, newsk); 1384 spin_unlock_irqrestore(&list->lock, flags); 1385 } 1386 1387 /** 1388 * skb_unlink - remove a buffer from a list 1389 * @skb: buffer to remove 1390 * @list: list to use 1391 * 1392 * Remove a packet from a list. The list locks are taken and this 1393 * function is atomic with respect to other list locked calls 1394 * 1395 * You must know what list the SKB is on. 1396 */ 1397 void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) 1398 { 1399 unsigned long flags; 1400 1401 spin_lock_irqsave(&list->lock, flags); 1402 __skb_unlink(skb, list); 1403 spin_unlock_irqrestore(&list->lock, flags); 1404 } 1405 1406 /** 1407 * skb_append - append a buffer 1408 * @old: buffer to insert after 1409 * @newsk: buffer to insert 1410 * @list: list to use 1411 * 1412 * Place a packet after a given packet in a list. The list locks are taken 1413 * and this function is atomic with respect to other list locked calls. 1414 * A buffer cannot be placed on two lists at the same time. 1415 */ 1416 void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) 1417 { 1418 unsigned long flags; 1419 1420 spin_lock_irqsave(&list->lock, flags); 1421 __skb_append(old, newsk, list); 1422 spin_unlock_irqrestore(&list->lock, flags); 1423 } 1424 1425 1426 /** 1427 * skb_insert - insert a buffer 1428 * @old: buffer to insert before 1429 * @newsk: buffer to insert 1430 * @list: list to use 1431 * 1432 * Place a packet before a given packet in a list. The list locks are 1433 * taken and this function is atomic with respect to other list locked 1434 * calls. 1435 * 1436 * A buffer cannot be placed on two lists at the same time. 1437 */ 1438 void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) 1439 { 1440 unsigned long flags; 1441 1442 spin_lock_irqsave(&list->lock, flags); 1443 __skb_insert(newsk, old->prev, old, list); 1444 spin_unlock_irqrestore(&list->lock, flags); 1445 } 1446 1447 #if 0 1448 /* 1449 * Tune the memory allocator for a new MTU size. 1450 */ 1451 void skb_add_mtu(int mtu) 1452 { 1453 /* Must match allocation in alloc_skb */ 1454 mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info); 1455 1456 kmem_add_cache_size(mtu); 1457 } 1458 #endif 1459 1460 static inline void skb_split_inside_header(struct sk_buff *skb, 1461 struct sk_buff* skb1, 1462 const u32 len, const int pos) 1463 { 1464 int i; 1465 1466 memcpy(skb_put(skb1, pos - len), skb->data + len, pos - len); 1467 1468 /* And move data appendix as is. */ 1469 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 1470 skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i]; 1471 1472 skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags; 1473 skb_shinfo(skb)->nr_frags = 0; 1474 skb1->data_len = skb->data_len; 1475 skb1->len += skb1->data_len; 1476 skb->data_len = 0; 1477 skb->len = len; 1478 skb->tail = skb->data + len; 1479 } 1480 1481 static inline void skb_split_no_header(struct sk_buff *skb, 1482 struct sk_buff* skb1, 1483 const u32 len, int pos) 1484 { 1485 int i, k = 0; 1486 const int nfrags = skb_shinfo(skb)->nr_frags; 1487 1488 skb_shinfo(skb)->nr_frags = 0; 1489 skb1->len = skb1->data_len = skb->len - len; 1490 skb->len = len; 1491 skb->data_len = len - pos; 1492 1493 for (i = 0; i < nfrags; i++) { 1494 int size = skb_shinfo(skb)->frags[i].size; 1495 1496 if (pos + size > len) { 1497 skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i]; 1498 1499 if (pos < len) { 1500 /* Split frag. 1501 * We have two variants in this case: 1502 * 1. Move all the frag to the second 1503 * part, if it is possible. F.e. 1504 * this approach is mandatory for TUX, 1505 * where splitting is expensive. 1506 * 2. Split is accurately. We make this. 1507 */ 1508 get_page(skb_shinfo(skb)->frags[i].page); 1509 skb_shinfo(skb1)->frags[0].page_offset += len - pos; 1510 skb_shinfo(skb1)->frags[0].size -= len - pos; 1511 skb_shinfo(skb)->frags[i].size = len - pos; 1512 skb_shinfo(skb)->nr_frags++; 1513 } 1514 k++; 1515 } else 1516 skb_shinfo(skb)->nr_frags++; 1517 pos += size; 1518 } 1519 skb_shinfo(skb1)->nr_frags = k; 1520 } 1521 1522 /** 1523 * skb_split - Split fragmented skb to two parts at length len. 1524 * @skb: the buffer to split 1525 * @skb1: the buffer to receive the second part 1526 * @len: new length for skb 1527 */ 1528 void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) 1529 { 1530 int pos = skb_headlen(skb); 1531 1532 if (len < pos) /* Split line is inside header. */ 1533 skb_split_inside_header(skb, skb1, len, pos); 1534 else /* Second chunk has no header, nothing to copy. */ 1535 skb_split_no_header(skb, skb1, len, pos); 1536 } 1537 1538 /** 1539 * skb_prepare_seq_read - Prepare a sequential read of skb data 1540 * @skb: the buffer to read 1541 * @from: lower offset of data to be read 1542 * @to: upper offset of data to be read 1543 * @st: state variable 1544 * 1545 * Initializes the specified state variable. Must be called before 1546 * invoking skb_seq_read() for the first time. 1547 */ 1548 void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, 1549 unsigned int to, struct skb_seq_state *st) 1550 { 1551 st->lower_offset = from; 1552 st->upper_offset = to; 1553 st->root_skb = st->cur_skb = skb; 1554 st->frag_idx = st->stepped_offset = 0; 1555 st->frag_data = NULL; 1556 } 1557 1558 /** 1559 * skb_seq_read - Sequentially read skb data 1560 * @consumed: number of bytes consumed by the caller so far 1561 * @data: destination pointer for data to be returned 1562 * @st: state variable 1563 * 1564 * Reads a block of skb data at &consumed relative to the 1565 * lower offset specified to skb_prepare_seq_read(). Assigns 1566 * the head of the data block to &data and returns the length 1567 * of the block or 0 if the end of the skb data or the upper 1568 * offset has been reached. 1569 * 1570 * The caller is not required to consume all of the data 1571 * returned, i.e. &consumed is typically set to the number 1572 * of bytes already consumed and the next call to 1573 * skb_seq_read() will return the remaining part of the block. 1574 * 1575 * Note: The size of each block of data returned can be arbitary, 1576 * this limitation is the cost for zerocopy seqeuental 1577 * reads of potentially non linear data. 1578 * 1579 * Note: Fragment lists within fragments are not implemented 1580 * at the moment, state->root_skb could be replaced with 1581 * a stack for this purpose. 1582 */ 1583 unsigned int skb_seq_read(unsigned int consumed, const u8 **data, 1584 struct skb_seq_state *st) 1585 { 1586 unsigned int block_limit, abs_offset = consumed + st->lower_offset; 1587 skb_frag_t *frag; 1588 1589 if (unlikely(abs_offset >= st->upper_offset)) 1590 return 0; 1591 1592 next_skb: 1593 block_limit = skb_headlen(st->cur_skb); 1594 1595 if (abs_offset < block_limit) { 1596 *data = st->cur_skb->data + abs_offset; 1597 return block_limit - abs_offset; 1598 } 1599 1600 if (st->frag_idx == 0 && !st->frag_data) 1601 st->stepped_offset += skb_headlen(st->cur_skb); 1602 1603 while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) { 1604 frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx]; 1605 block_limit = frag->size + st->stepped_offset; 1606 1607 if (abs_offset < block_limit) { 1608 if (!st->frag_data) 1609 st->frag_data = kmap_skb_frag(frag); 1610 1611 *data = (u8 *) st->frag_data + frag->page_offset + 1612 (abs_offset - st->stepped_offset); 1613 1614 return block_limit - abs_offset; 1615 } 1616 1617 if (st->frag_data) { 1618 kunmap_skb_frag(st->frag_data); 1619 st->frag_data = NULL; 1620 } 1621 1622 st->frag_idx++; 1623 st->stepped_offset += frag->size; 1624 } 1625 1626 if (st->cur_skb->next) { 1627 st->cur_skb = st->cur_skb->next; 1628 st->frag_idx = 0; 1629 goto next_skb; 1630 } else if (st->root_skb == st->cur_skb && 1631 skb_shinfo(st->root_skb)->frag_list) { 1632 st->cur_skb = skb_shinfo(st->root_skb)->frag_list; 1633 goto next_skb; 1634 } 1635 1636 return 0; 1637 } 1638 1639 /** 1640 * skb_abort_seq_read - Abort a sequential read of skb data 1641 * @st: state variable 1642 * 1643 * Must be called if skb_seq_read() was not called until it 1644 * returned 0. 1645 */ 1646 void skb_abort_seq_read(struct skb_seq_state *st) 1647 { 1648 if (st->frag_data) 1649 kunmap_skb_frag(st->frag_data); 1650 } 1651 1652 #define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb)) 1653 1654 static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text, 1655 struct ts_config *conf, 1656 struct ts_state *state) 1657 { 1658 return skb_seq_read(offset, text, TS_SKB_CB(state)); 1659 } 1660 1661 static void skb_ts_finish(struct ts_config *conf, struct ts_state *state) 1662 { 1663 skb_abort_seq_read(TS_SKB_CB(state)); 1664 } 1665 1666 /** 1667 * skb_find_text - Find a text pattern in skb data 1668 * @skb: the buffer to look in 1669 * @from: search offset 1670 * @to: search limit 1671 * @config: textsearch configuration 1672 * @state: uninitialized textsearch state variable 1673 * 1674 * Finds a pattern in the skb data according to the specified 1675 * textsearch configuration. Use textsearch_next() to retrieve 1676 * subsequent occurrences of the pattern. Returns the offset 1677 * to the first occurrence or UINT_MAX if no match was found. 1678 */ 1679 unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, 1680 unsigned int to, struct ts_config *config, 1681 struct ts_state *state) 1682 { 1683 config->get_next_block = skb_ts_get_next_block; 1684 config->finish = skb_ts_finish; 1685 1686 skb_prepare_seq_read(skb, from, to, TS_SKB_CB(state)); 1687 1688 return textsearch_find(config, state); 1689 } 1690 1691 void __init skb_init(void) 1692 { 1693 skbuff_head_cache = kmem_cache_create("skbuff_head_cache", 1694 sizeof(struct sk_buff), 1695 0, 1696 SLAB_HWCACHE_ALIGN, 1697 NULL, NULL); 1698 if (!skbuff_head_cache) 1699 panic("cannot create skbuff cache"); 1700 1701 skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", 1702 (2*sizeof(struct sk_buff)) + 1703 sizeof(atomic_t), 1704 0, 1705 SLAB_HWCACHE_ALIGN, 1706 NULL, NULL); 1707 if (!skbuff_fclone_cache) 1708 panic("cannot create skbuff cache"); 1709 } 1710 1711 EXPORT_SYMBOL(___pskb_trim); 1712 EXPORT_SYMBOL(__kfree_skb); 1713 EXPORT_SYMBOL(__pskb_pull_tail); 1714 EXPORT_SYMBOL(__alloc_skb); 1715 EXPORT_SYMBOL(pskb_copy); 1716 EXPORT_SYMBOL(pskb_expand_head); 1717 EXPORT_SYMBOL(skb_checksum); 1718 EXPORT_SYMBOL(skb_clone); 1719 EXPORT_SYMBOL(skb_clone_fraglist); 1720 EXPORT_SYMBOL(skb_copy); 1721 EXPORT_SYMBOL(skb_copy_and_csum_bits); 1722 EXPORT_SYMBOL(skb_copy_and_csum_dev); 1723 EXPORT_SYMBOL(skb_copy_bits); 1724 EXPORT_SYMBOL(skb_copy_expand); 1725 EXPORT_SYMBOL(skb_over_panic); 1726 EXPORT_SYMBOL(skb_pad); 1727 EXPORT_SYMBOL(skb_realloc_headroom); 1728 EXPORT_SYMBOL(skb_under_panic); 1729 EXPORT_SYMBOL(skb_dequeue); 1730 EXPORT_SYMBOL(skb_dequeue_tail); 1731 EXPORT_SYMBOL(skb_insert); 1732 EXPORT_SYMBOL(skb_queue_purge); 1733 EXPORT_SYMBOL(skb_queue_head); 1734 EXPORT_SYMBOL(skb_queue_tail); 1735 EXPORT_SYMBOL(skb_unlink); 1736 EXPORT_SYMBOL(skb_append); 1737 EXPORT_SYMBOL(skb_split); 1738 EXPORT_SYMBOL(skb_prepare_seq_read); 1739 EXPORT_SYMBOL(skb_seq_read); 1740 EXPORT_SYMBOL(skb_abort_seq_read); 1741 EXPORT_SYMBOL(skb_find_text); 1742