1 /* 2 * Routines having to do with the 'struct sk_buff' memory handlers. 3 * 4 * Authors: Alan Cox <iiitac@pyr.swan.ac.uk> 5 * Florian La Roche <rzsfl@rz.uni-sb.de> 6 * 7 * Version: $Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $ 8 * 9 * Fixes: 10 * Alan Cox : Fixed the worst of the load 11 * balancer bugs. 12 * Dave Platt : Interrupt stacking fix. 13 * Richard Kooijman : Timestamp fixes. 14 * Alan Cox : Changed buffer format. 15 * Alan Cox : destructor hook for AF_UNIX etc. 16 * Linus Torvalds : Better skb_clone. 17 * Alan Cox : Added skb_copy. 18 * Alan Cox : Added all the changed routines Linus 19 * only put in the headers 20 * Ray VanTassle : Fixed --skb->lock in free 21 * Alan Cox : skb_copy copy arp field 22 * Andi Kleen : slabified it. 23 * Robert Olsson : Removed skb_head_pool 24 * 25 * NOTE: 26 * The __skb_ routines should be called with interrupts 27 * disabled, or you better be *real* sure that the operation is atomic 28 * with respect to whatever list is being frobbed (e.g. via lock_sock() 29 * or via disabling bottom half handlers, etc). 30 * 31 * This program is free software; you can redistribute it and/or 32 * modify it under the terms of the GNU General Public License 33 * as published by the Free Software Foundation; either version 34 * 2 of the License, or (at your option) any later version. 35 */ 36 37 /* 38 * The functions in this file will not compile correctly with gcc 2.4.x 39 */ 40 41 #include <linux/config.h> 42 #include <linux/module.h> 43 #include <linux/types.h> 44 #include <linux/kernel.h> 45 #include <linux/sched.h> 46 #include <linux/mm.h> 47 #include <linux/interrupt.h> 48 #include <linux/in.h> 49 #include <linux/inet.h> 50 #include <linux/slab.h> 51 #include <linux/netdevice.h> 52 #ifdef CONFIG_NET_CLS_ACT 53 #include <net/pkt_sched.h> 54 #endif 55 #include <linux/string.h> 56 #include <linux/skbuff.h> 57 #include <linux/cache.h> 58 #include <linux/rtnetlink.h> 59 #include <linux/init.h> 60 #include <linux/highmem.h> 61 62 #include <net/protocol.h> 63 #include <net/dst.h> 64 #include <net/sock.h> 65 #include <net/checksum.h> 66 #include <net/xfrm.h> 67 68 #include <asm/uaccess.h> 69 #include <asm/system.h> 70 71 static kmem_cache_t *skbuff_head_cache; 72 73 /* 74 * Keep out-of-line to prevent kernel bloat. 75 * __builtin_return_address is not used because it is not always 76 * reliable. 77 */ 78 79 /** 80 * skb_over_panic - private function 81 * @skb: buffer 82 * @sz: size 83 * @here: address 84 * 85 * Out of line support code for skb_put(). Not user callable. 86 */ 87 void skb_over_panic(struct sk_buff *skb, int sz, void *here) 88 { 89 printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p " 90 "data:%p tail:%p end:%p dev:%s\n", 91 here, skb->len, sz, skb->head, skb->data, skb->tail, skb->end, 92 skb->dev ? skb->dev->name : "<NULL>"); 93 BUG(); 94 } 95 96 /** 97 * skb_under_panic - private function 98 * @skb: buffer 99 * @sz: size 100 * @here: address 101 * 102 * Out of line support code for skb_push(). Not user callable. 103 */ 104 105 void skb_under_panic(struct sk_buff *skb, int sz, void *here) 106 { 107 printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p " 108 "data:%p tail:%p end:%p dev:%s\n", 109 here, skb->len, sz, skb->head, skb->data, skb->tail, skb->end, 110 skb->dev ? skb->dev->name : "<NULL>"); 111 BUG(); 112 } 113 114 /* Allocate a new skbuff. We do this ourselves so we can fill in a few 115 * 'private' fields and also do memory statistics to find all the 116 * [BEEP] leaks. 117 * 118 */ 119 120 /** 121 * alloc_skb - allocate a network buffer 122 * @size: size to allocate 123 * @gfp_mask: allocation mask 124 * 125 * Allocate a new &sk_buff. The returned buffer has no headroom and a 126 * tail room of size bytes. The object has a reference count of one. 127 * The return is the buffer. On a failure the return is %NULL. 128 * 129 * Buffers may only be allocated from interrupts using a @gfp_mask of 130 * %GFP_ATOMIC. 131 */ 132 struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask) 133 { 134 struct sk_buff *skb; 135 u8 *data; 136 137 /* Get the HEAD */ 138 skb = kmem_cache_alloc(skbuff_head_cache, 139 gfp_mask & ~__GFP_DMA); 140 if (!skb) 141 goto out; 142 143 /* Get the DATA. Size must match skb_add_mtu(). */ 144 size = SKB_DATA_ALIGN(size); 145 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); 146 if (!data) 147 goto nodata; 148 149 memset(skb, 0, offsetof(struct sk_buff, truesize)); 150 skb->truesize = size + sizeof(struct sk_buff); 151 atomic_set(&skb->users, 1); 152 skb->head = data; 153 skb->data = data; 154 skb->tail = data; 155 skb->end = data + size; 156 157 atomic_set(&(skb_shinfo(skb)->dataref), 1); 158 skb_shinfo(skb)->nr_frags = 0; 159 skb_shinfo(skb)->tso_size = 0; 160 skb_shinfo(skb)->tso_segs = 0; 161 skb_shinfo(skb)->frag_list = NULL; 162 out: 163 return skb; 164 nodata: 165 kmem_cache_free(skbuff_head_cache, skb); 166 skb = NULL; 167 goto out; 168 } 169 170 /** 171 * alloc_skb_from_cache - allocate a network buffer 172 * @cp: kmem_cache from which to allocate the data area 173 * (object size must be big enough for @size bytes + skb overheads) 174 * @size: size to allocate 175 * @gfp_mask: allocation mask 176 * 177 * Allocate a new &sk_buff. The returned buffer has no headroom and 178 * tail room of size bytes. The object has a reference count of one. 179 * The return is the buffer. On a failure the return is %NULL. 180 * 181 * Buffers may only be allocated from interrupts using a @gfp_mask of 182 * %GFP_ATOMIC. 183 */ 184 struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, 185 unsigned int size, 186 unsigned int __nocast gfp_mask) 187 { 188 struct sk_buff *skb; 189 u8 *data; 190 191 /* Get the HEAD */ 192 skb = kmem_cache_alloc(skbuff_head_cache, 193 gfp_mask & ~__GFP_DMA); 194 if (!skb) 195 goto out; 196 197 /* Get the DATA. */ 198 size = SKB_DATA_ALIGN(size); 199 data = kmem_cache_alloc(cp, gfp_mask); 200 if (!data) 201 goto nodata; 202 203 memset(skb, 0, offsetof(struct sk_buff, truesize)); 204 skb->truesize = size + sizeof(struct sk_buff); 205 atomic_set(&skb->users, 1); 206 skb->head = data; 207 skb->data = data; 208 skb->tail = data; 209 skb->end = data + size; 210 211 atomic_set(&(skb_shinfo(skb)->dataref), 1); 212 skb_shinfo(skb)->nr_frags = 0; 213 skb_shinfo(skb)->tso_size = 0; 214 skb_shinfo(skb)->tso_segs = 0; 215 skb_shinfo(skb)->frag_list = NULL; 216 out: 217 return skb; 218 nodata: 219 kmem_cache_free(skbuff_head_cache, skb); 220 skb = NULL; 221 goto out; 222 } 223 224 225 static void skb_drop_fraglist(struct sk_buff *skb) 226 { 227 struct sk_buff *list = skb_shinfo(skb)->frag_list; 228 229 skb_shinfo(skb)->frag_list = NULL; 230 231 do { 232 struct sk_buff *this = list; 233 list = list->next; 234 kfree_skb(this); 235 } while (list); 236 } 237 238 static void skb_clone_fraglist(struct sk_buff *skb) 239 { 240 struct sk_buff *list; 241 242 for (list = skb_shinfo(skb)->frag_list; list; list = list->next) 243 skb_get(list); 244 } 245 246 void skb_release_data(struct sk_buff *skb) 247 { 248 if (!skb->cloned || 249 !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1, 250 &skb_shinfo(skb)->dataref)) { 251 if (skb_shinfo(skb)->nr_frags) { 252 int i; 253 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 254 put_page(skb_shinfo(skb)->frags[i].page); 255 } 256 257 if (skb_shinfo(skb)->frag_list) 258 skb_drop_fraglist(skb); 259 260 kfree(skb->head); 261 } 262 } 263 264 /* 265 * Free an skbuff by memory without cleaning the state. 266 */ 267 void kfree_skbmem(struct sk_buff *skb) 268 { 269 skb_release_data(skb); 270 kmem_cache_free(skbuff_head_cache, skb); 271 } 272 273 /** 274 * __kfree_skb - private function 275 * @skb: buffer 276 * 277 * Free an sk_buff. Release anything attached to the buffer. 278 * Clean the state. This is an internal helper function. Users should 279 * always call kfree_skb 280 */ 281 282 void __kfree_skb(struct sk_buff *skb) 283 { 284 BUG_ON(skb->list != NULL); 285 286 dst_release(skb->dst); 287 #ifdef CONFIG_XFRM 288 secpath_put(skb->sp); 289 #endif 290 if (skb->destructor) { 291 WARN_ON(in_irq()); 292 skb->destructor(skb); 293 } 294 #ifdef CONFIG_NETFILTER 295 nf_conntrack_put(skb->nfct); 296 #ifdef CONFIG_BRIDGE_NETFILTER 297 nf_bridge_put(skb->nf_bridge); 298 #endif 299 #endif 300 /* XXX: IS this still necessary? - JHS */ 301 #ifdef CONFIG_NET_SCHED 302 skb->tc_index = 0; 303 #ifdef CONFIG_NET_CLS_ACT 304 skb->tc_verd = 0; 305 skb->tc_classid = 0; 306 #endif 307 #endif 308 309 kfree_skbmem(skb); 310 } 311 312 /** 313 * skb_clone - duplicate an sk_buff 314 * @skb: buffer to clone 315 * @gfp_mask: allocation priority 316 * 317 * Duplicate an &sk_buff. The new one is not owned by a socket. Both 318 * copies share the same packet data but not structure. The new 319 * buffer has a reference count of 1. If the allocation fails the 320 * function returns %NULL otherwise the new buffer is returned. 321 * 322 * If this function is called from an interrupt gfp_mask() must be 323 * %GFP_ATOMIC. 324 */ 325 326 struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) 327 { 328 struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); 329 330 if (!n) 331 return NULL; 332 333 #define C(x) n->x = skb->x 334 335 n->next = n->prev = NULL; 336 n->list = NULL; 337 n->sk = NULL; 338 C(stamp); 339 C(dev); 340 C(real_dev); 341 C(h); 342 C(nh); 343 C(mac); 344 C(dst); 345 dst_clone(skb->dst); 346 C(sp); 347 #ifdef CONFIG_INET 348 secpath_get(skb->sp); 349 #endif 350 memcpy(n->cb, skb->cb, sizeof(skb->cb)); 351 C(len); 352 C(data_len); 353 C(csum); 354 C(local_df); 355 n->cloned = 1; 356 n->nohdr = 0; 357 C(pkt_type); 358 C(ip_summed); 359 C(priority); 360 C(protocol); 361 n->destructor = NULL; 362 #ifdef CONFIG_NETFILTER 363 C(nfmark); 364 C(nfcache); 365 C(nfct); 366 nf_conntrack_get(skb->nfct); 367 C(nfctinfo); 368 #ifdef CONFIG_BRIDGE_NETFILTER 369 C(nf_bridge); 370 nf_bridge_get(skb->nf_bridge); 371 #endif 372 #endif /*CONFIG_NETFILTER*/ 373 #if defined(CONFIG_HIPPI) 374 C(private); 375 #endif 376 #ifdef CONFIG_NET_SCHED 377 C(tc_index); 378 #ifdef CONFIG_NET_CLS_ACT 379 n->tc_verd = SET_TC_VERD(skb->tc_verd,0); 380 n->tc_verd = CLR_TC_OK2MUNGE(skb->tc_verd); 381 n->tc_verd = CLR_TC_MUNGED(skb->tc_verd); 382 C(input_dev); 383 C(tc_classid); 384 #endif 385 386 #endif 387 C(truesize); 388 atomic_set(&n->users, 1); 389 C(head); 390 C(data); 391 C(tail); 392 C(end); 393 394 atomic_inc(&(skb_shinfo(skb)->dataref)); 395 skb->cloned = 1; 396 397 return n; 398 } 399 400 static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) 401 { 402 /* 403 * Shift between the two data areas in bytes 404 */ 405 unsigned long offset = new->data - old->data; 406 407 new->list = NULL; 408 new->sk = NULL; 409 new->dev = old->dev; 410 new->real_dev = old->real_dev; 411 new->priority = old->priority; 412 new->protocol = old->protocol; 413 new->dst = dst_clone(old->dst); 414 #ifdef CONFIG_INET 415 new->sp = secpath_get(old->sp); 416 #endif 417 new->h.raw = old->h.raw + offset; 418 new->nh.raw = old->nh.raw + offset; 419 new->mac.raw = old->mac.raw + offset; 420 memcpy(new->cb, old->cb, sizeof(old->cb)); 421 new->local_df = old->local_df; 422 new->pkt_type = old->pkt_type; 423 new->stamp = old->stamp; 424 new->destructor = NULL; 425 #ifdef CONFIG_NETFILTER 426 new->nfmark = old->nfmark; 427 new->nfcache = old->nfcache; 428 new->nfct = old->nfct; 429 nf_conntrack_get(old->nfct); 430 new->nfctinfo = old->nfctinfo; 431 #ifdef CONFIG_BRIDGE_NETFILTER 432 new->nf_bridge = old->nf_bridge; 433 nf_bridge_get(old->nf_bridge); 434 #endif 435 #endif 436 #ifdef CONFIG_NET_SCHED 437 #ifdef CONFIG_NET_CLS_ACT 438 new->tc_verd = old->tc_verd; 439 #endif 440 new->tc_index = old->tc_index; 441 #endif 442 atomic_set(&new->users, 1); 443 skb_shinfo(new)->tso_size = skb_shinfo(old)->tso_size; 444 skb_shinfo(new)->tso_segs = skb_shinfo(old)->tso_segs; 445 } 446 447 /** 448 * skb_copy - create private copy of an sk_buff 449 * @skb: buffer to copy 450 * @gfp_mask: allocation priority 451 * 452 * Make a copy of both an &sk_buff and its data. This is used when the 453 * caller wishes to modify the data and needs a private copy of the 454 * data to alter. Returns %NULL on failure or the pointer to the buffer 455 * on success. The returned buffer has a reference count of 1. 456 * 457 * As by-product this function converts non-linear &sk_buff to linear 458 * one, so that &sk_buff becomes completely private and caller is allowed 459 * to modify all the data of returned buffer. This means that this 460 * function is not recommended for use in circumstances when only 461 * header is going to be modified. Use pskb_copy() instead. 462 */ 463 464 struct sk_buff *skb_copy(const struct sk_buff *skb, unsigned int __nocast gfp_mask) 465 { 466 int headerlen = skb->data - skb->head; 467 /* 468 * Allocate the copy buffer 469 */ 470 struct sk_buff *n = alloc_skb(skb->end - skb->head + skb->data_len, 471 gfp_mask); 472 if (!n) 473 return NULL; 474 475 /* Set the data pointer */ 476 skb_reserve(n, headerlen); 477 /* Set the tail pointer and length */ 478 skb_put(n, skb->len); 479 n->csum = skb->csum; 480 n->ip_summed = skb->ip_summed; 481 482 if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)) 483 BUG(); 484 485 copy_skb_header(n, skb); 486 return n; 487 } 488 489 490 /** 491 * pskb_copy - create copy of an sk_buff with private head. 492 * @skb: buffer to copy 493 * @gfp_mask: allocation priority 494 * 495 * Make a copy of both an &sk_buff and part of its data, located 496 * in header. Fragmented data remain shared. This is used when 497 * the caller wishes to modify only header of &sk_buff and needs 498 * private copy of the header to alter. Returns %NULL on failure 499 * or the pointer to the buffer on success. 500 * The returned buffer has a reference count of 1. 501 */ 502 503 struct sk_buff *pskb_copy(struct sk_buff *skb, unsigned int __nocast gfp_mask) 504 { 505 /* 506 * Allocate the copy buffer 507 */ 508 struct sk_buff *n = alloc_skb(skb->end - skb->head, gfp_mask); 509 510 if (!n) 511 goto out; 512 513 /* Set the data pointer */ 514 skb_reserve(n, skb->data - skb->head); 515 /* Set the tail pointer and length */ 516 skb_put(n, skb_headlen(skb)); 517 /* Copy the bytes */ 518 memcpy(n->data, skb->data, n->len); 519 n->csum = skb->csum; 520 n->ip_summed = skb->ip_summed; 521 522 n->data_len = skb->data_len; 523 n->len = skb->len; 524 525 if (skb_shinfo(skb)->nr_frags) { 526 int i; 527 528 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 529 skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; 530 get_page(skb_shinfo(n)->frags[i].page); 531 } 532 skb_shinfo(n)->nr_frags = i; 533 } 534 535 if (skb_shinfo(skb)->frag_list) { 536 skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; 537 skb_clone_fraglist(n); 538 } 539 540 copy_skb_header(n, skb); 541 out: 542 return n; 543 } 544 545 /** 546 * pskb_expand_head - reallocate header of &sk_buff 547 * @skb: buffer to reallocate 548 * @nhead: room to add at head 549 * @ntail: room to add at tail 550 * @gfp_mask: allocation priority 551 * 552 * Expands (or creates identical copy, if &nhead and &ntail are zero) 553 * header of skb. &sk_buff itself is not changed. &sk_buff MUST have 554 * reference count of 1. Returns zero in the case of success or error, 555 * if expansion failed. In the last case, &sk_buff is not changed. 556 * 557 * All the pointers pointing into skb header may change and must be 558 * reloaded after call to this function. 559 */ 560 561 int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, 562 unsigned int __nocast gfp_mask) 563 { 564 int i; 565 u8 *data; 566 int size = nhead + (skb->end - skb->head) + ntail; 567 long off; 568 569 if (skb_shared(skb)) 570 BUG(); 571 572 size = SKB_DATA_ALIGN(size); 573 574 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); 575 if (!data) 576 goto nodata; 577 578 /* Copy only real data... and, alas, header. This should be 579 * optimized for the cases when header is void. */ 580 memcpy(data + nhead, skb->head, skb->tail - skb->head); 581 memcpy(data + size, skb->end, sizeof(struct skb_shared_info)); 582 583 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 584 get_page(skb_shinfo(skb)->frags[i].page); 585 586 if (skb_shinfo(skb)->frag_list) 587 skb_clone_fraglist(skb); 588 589 skb_release_data(skb); 590 591 off = (data + nhead) - skb->head; 592 593 skb->head = data; 594 skb->end = data + size; 595 skb->data += off; 596 skb->tail += off; 597 skb->mac.raw += off; 598 skb->h.raw += off; 599 skb->nh.raw += off; 600 skb->cloned = 0; 601 skb->nohdr = 0; 602 atomic_set(&skb_shinfo(skb)->dataref, 1); 603 return 0; 604 605 nodata: 606 return -ENOMEM; 607 } 608 609 /* Make private copy of skb with writable head and some headroom */ 610 611 struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) 612 { 613 struct sk_buff *skb2; 614 int delta = headroom - skb_headroom(skb); 615 616 if (delta <= 0) 617 skb2 = pskb_copy(skb, GFP_ATOMIC); 618 else { 619 skb2 = skb_clone(skb, GFP_ATOMIC); 620 if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, 621 GFP_ATOMIC)) { 622 kfree_skb(skb2); 623 skb2 = NULL; 624 } 625 } 626 return skb2; 627 } 628 629 630 /** 631 * skb_copy_expand - copy and expand sk_buff 632 * @skb: buffer to copy 633 * @newheadroom: new free bytes at head 634 * @newtailroom: new free bytes at tail 635 * @gfp_mask: allocation priority 636 * 637 * Make a copy of both an &sk_buff and its data and while doing so 638 * allocate additional space. 639 * 640 * This is used when the caller wishes to modify the data and needs a 641 * private copy of the data to alter as well as more space for new fields. 642 * Returns %NULL on failure or the pointer to the buffer 643 * on success. The returned buffer has a reference count of 1. 644 * 645 * You must pass %GFP_ATOMIC as the allocation priority if this function 646 * is called from an interrupt. 647 * 648 * BUG ALERT: ip_summed is not copied. Why does this work? Is it used 649 * only by netfilter in the cases when checksum is recalculated? --ANK 650 */ 651 struct sk_buff *skb_copy_expand(const struct sk_buff *skb, 652 int newheadroom, int newtailroom, 653 unsigned int __nocast gfp_mask) 654 { 655 /* 656 * Allocate the copy buffer 657 */ 658 struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom, 659 gfp_mask); 660 int head_copy_len, head_copy_off; 661 662 if (!n) 663 return NULL; 664 665 skb_reserve(n, newheadroom); 666 667 /* Set the tail pointer and length */ 668 skb_put(n, skb->len); 669 670 head_copy_len = skb_headroom(skb); 671 head_copy_off = 0; 672 if (newheadroom <= head_copy_len) 673 head_copy_len = newheadroom; 674 else 675 head_copy_off = newheadroom - head_copy_len; 676 677 /* Copy the linear header and data. */ 678 if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off, 679 skb->len + head_copy_len)) 680 BUG(); 681 682 copy_skb_header(n, skb); 683 684 return n; 685 } 686 687 /** 688 * skb_pad - zero pad the tail of an skb 689 * @skb: buffer to pad 690 * @pad: space to pad 691 * 692 * Ensure that a buffer is followed by a padding area that is zero 693 * filled. Used by network drivers which may DMA or transfer data 694 * beyond the buffer end onto the wire. 695 * 696 * May return NULL in out of memory cases. 697 */ 698 699 struct sk_buff *skb_pad(struct sk_buff *skb, int pad) 700 { 701 struct sk_buff *nskb; 702 703 /* If the skbuff is non linear tailroom is always zero.. */ 704 if (skb_tailroom(skb) >= pad) { 705 memset(skb->data+skb->len, 0, pad); 706 return skb; 707 } 708 709 nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + pad, GFP_ATOMIC); 710 kfree_skb(skb); 711 if (nskb) 712 memset(nskb->data+nskb->len, 0, pad); 713 return nskb; 714 } 715 716 /* Trims skb to length len. It can change skb pointers, if "realloc" is 1. 717 * If realloc==0 and trimming is impossible without change of data, 718 * it is BUG(). 719 */ 720 721 int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc) 722 { 723 int offset = skb_headlen(skb); 724 int nfrags = skb_shinfo(skb)->nr_frags; 725 int i; 726 727 for (i = 0; i < nfrags; i++) { 728 int end = offset + skb_shinfo(skb)->frags[i].size; 729 if (end > len) { 730 if (skb_cloned(skb)) { 731 if (!realloc) 732 BUG(); 733 if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) 734 return -ENOMEM; 735 } 736 if (len <= offset) { 737 put_page(skb_shinfo(skb)->frags[i].page); 738 skb_shinfo(skb)->nr_frags--; 739 } else { 740 skb_shinfo(skb)->frags[i].size = len - offset; 741 } 742 } 743 offset = end; 744 } 745 746 if (offset < len) { 747 skb->data_len -= skb->len - len; 748 skb->len = len; 749 } else { 750 if (len <= skb_headlen(skb)) { 751 skb->len = len; 752 skb->data_len = 0; 753 skb->tail = skb->data + len; 754 if (skb_shinfo(skb)->frag_list && !skb_cloned(skb)) 755 skb_drop_fraglist(skb); 756 } else { 757 skb->data_len -= skb->len - len; 758 skb->len = len; 759 } 760 } 761 762 return 0; 763 } 764 765 /** 766 * __pskb_pull_tail - advance tail of skb header 767 * @skb: buffer to reallocate 768 * @delta: number of bytes to advance tail 769 * 770 * The function makes a sense only on a fragmented &sk_buff, 771 * it expands header moving its tail forward and copying necessary 772 * data from fragmented part. 773 * 774 * &sk_buff MUST have reference count of 1. 775 * 776 * Returns %NULL (and &sk_buff does not change) if pull failed 777 * or value of new tail of skb in the case of success. 778 * 779 * All the pointers pointing into skb header may change and must be 780 * reloaded after call to this function. 781 */ 782 783 /* Moves tail of skb head forward, copying data from fragmented part, 784 * when it is necessary. 785 * 1. It may fail due to malloc failure. 786 * 2. It may change skb pointers. 787 * 788 * It is pretty complicated. Luckily, it is called only in exceptional cases. 789 */ 790 unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta) 791 { 792 /* If skb has not enough free space at tail, get new one 793 * plus 128 bytes for future expansions. If we have enough 794 * room at tail, reallocate without expansion only if skb is cloned. 795 */ 796 int i, k, eat = (skb->tail + delta) - skb->end; 797 798 if (eat > 0 || skb_cloned(skb)) { 799 if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0, 800 GFP_ATOMIC)) 801 return NULL; 802 } 803 804 if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta)) 805 BUG(); 806 807 /* Optimization: no fragments, no reasons to preestimate 808 * size of pulled pages. Superb. 809 */ 810 if (!skb_shinfo(skb)->frag_list) 811 goto pull_pages; 812 813 /* Estimate size of pulled pages. */ 814 eat = delta; 815 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 816 if (skb_shinfo(skb)->frags[i].size >= eat) 817 goto pull_pages; 818 eat -= skb_shinfo(skb)->frags[i].size; 819 } 820 821 /* If we need update frag list, we are in troubles. 822 * Certainly, it possible to add an offset to skb data, 823 * but taking into account that pulling is expected to 824 * be very rare operation, it is worth to fight against 825 * further bloating skb head and crucify ourselves here instead. 826 * Pure masohism, indeed. 8)8) 827 */ 828 if (eat) { 829 struct sk_buff *list = skb_shinfo(skb)->frag_list; 830 struct sk_buff *clone = NULL; 831 struct sk_buff *insp = NULL; 832 833 do { 834 if (!list) 835 BUG(); 836 837 if (list->len <= eat) { 838 /* Eaten as whole. */ 839 eat -= list->len; 840 list = list->next; 841 insp = list; 842 } else { 843 /* Eaten partially. */ 844 845 if (skb_shared(list)) { 846 /* Sucks! We need to fork list. :-( */ 847 clone = skb_clone(list, GFP_ATOMIC); 848 if (!clone) 849 return NULL; 850 insp = list->next; 851 list = clone; 852 } else { 853 /* This may be pulled without 854 * problems. */ 855 insp = list; 856 } 857 if (!pskb_pull(list, eat)) { 858 if (clone) 859 kfree_skb(clone); 860 return NULL; 861 } 862 break; 863 } 864 } while (eat); 865 866 /* Free pulled out fragments. */ 867 while ((list = skb_shinfo(skb)->frag_list) != insp) { 868 skb_shinfo(skb)->frag_list = list->next; 869 kfree_skb(list); 870 } 871 /* And insert new clone at head. */ 872 if (clone) { 873 clone->next = list; 874 skb_shinfo(skb)->frag_list = clone; 875 } 876 } 877 /* Success! Now we may commit changes to skb data. */ 878 879 pull_pages: 880 eat = delta; 881 k = 0; 882 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 883 if (skb_shinfo(skb)->frags[i].size <= eat) { 884 put_page(skb_shinfo(skb)->frags[i].page); 885 eat -= skb_shinfo(skb)->frags[i].size; 886 } else { 887 skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; 888 if (eat) { 889 skb_shinfo(skb)->frags[k].page_offset += eat; 890 skb_shinfo(skb)->frags[k].size -= eat; 891 eat = 0; 892 } 893 k++; 894 } 895 } 896 skb_shinfo(skb)->nr_frags = k; 897 898 skb->tail += delta; 899 skb->data_len -= delta; 900 901 return skb->tail; 902 } 903 904 /* Copy some data bits from skb to kernel buffer. */ 905 906 int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) 907 { 908 int i, copy; 909 int start = skb_headlen(skb); 910 911 if (offset > (int)skb->len - len) 912 goto fault; 913 914 /* Copy header. */ 915 if ((copy = start - offset) > 0) { 916 if (copy > len) 917 copy = len; 918 memcpy(to, skb->data + offset, copy); 919 if ((len -= copy) == 0) 920 return 0; 921 offset += copy; 922 to += copy; 923 } 924 925 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 926 int end; 927 928 BUG_TRAP(start <= offset + len); 929 930 end = start + skb_shinfo(skb)->frags[i].size; 931 if ((copy = end - offset) > 0) { 932 u8 *vaddr; 933 934 if (copy > len) 935 copy = len; 936 937 vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); 938 memcpy(to, 939 vaddr + skb_shinfo(skb)->frags[i].page_offset+ 940 offset - start, copy); 941 kunmap_skb_frag(vaddr); 942 943 if ((len -= copy) == 0) 944 return 0; 945 offset += copy; 946 to += copy; 947 } 948 start = end; 949 } 950 951 if (skb_shinfo(skb)->frag_list) { 952 struct sk_buff *list = skb_shinfo(skb)->frag_list; 953 954 for (; list; list = list->next) { 955 int end; 956 957 BUG_TRAP(start <= offset + len); 958 959 end = start + list->len; 960 if ((copy = end - offset) > 0) { 961 if (copy > len) 962 copy = len; 963 if (skb_copy_bits(list, offset - start, 964 to, copy)) 965 goto fault; 966 if ((len -= copy) == 0) 967 return 0; 968 offset += copy; 969 to += copy; 970 } 971 start = end; 972 } 973 } 974 if (!len) 975 return 0; 976 977 fault: 978 return -EFAULT; 979 } 980 981 /** 982 * skb_store_bits - store bits from kernel buffer to skb 983 * @skb: destination buffer 984 * @offset: offset in destination 985 * @from: source buffer 986 * @len: number of bytes to copy 987 * 988 * Copy the specified number of bytes from the source buffer to the 989 * destination skb. This function handles all the messy bits of 990 * traversing fragment lists and such. 991 */ 992 993 int skb_store_bits(const struct sk_buff *skb, int offset, void *from, int len) 994 { 995 int i, copy; 996 int start = skb_headlen(skb); 997 998 if (offset > (int)skb->len - len) 999 goto fault; 1000 1001 if ((copy = start - offset) > 0) { 1002 if (copy > len) 1003 copy = len; 1004 memcpy(skb->data + offset, from, copy); 1005 if ((len -= copy) == 0) 1006 return 0; 1007 offset += copy; 1008 from += copy; 1009 } 1010 1011 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1012 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 1013 int end; 1014 1015 BUG_TRAP(start <= offset + len); 1016 1017 end = start + frag->size; 1018 if ((copy = end - offset) > 0) { 1019 u8 *vaddr; 1020 1021 if (copy > len) 1022 copy = len; 1023 1024 vaddr = kmap_skb_frag(frag); 1025 memcpy(vaddr + frag->page_offset + offset - start, 1026 from, copy); 1027 kunmap_skb_frag(vaddr); 1028 1029 if ((len -= copy) == 0) 1030 return 0; 1031 offset += copy; 1032 from += copy; 1033 } 1034 start = end; 1035 } 1036 1037 if (skb_shinfo(skb)->frag_list) { 1038 struct sk_buff *list = skb_shinfo(skb)->frag_list; 1039 1040 for (; list; list = list->next) { 1041 int end; 1042 1043 BUG_TRAP(start <= offset + len); 1044 1045 end = start + list->len; 1046 if ((copy = end - offset) > 0) { 1047 if (copy > len) 1048 copy = len; 1049 if (skb_store_bits(list, offset - start, 1050 from, copy)) 1051 goto fault; 1052 if ((len -= copy) == 0) 1053 return 0; 1054 offset += copy; 1055 from += copy; 1056 } 1057 start = end; 1058 } 1059 } 1060 if (!len) 1061 return 0; 1062 1063 fault: 1064 return -EFAULT; 1065 } 1066 1067 EXPORT_SYMBOL(skb_store_bits); 1068 1069 /* Checksum skb data. */ 1070 1071 unsigned int skb_checksum(const struct sk_buff *skb, int offset, 1072 int len, unsigned int csum) 1073 { 1074 int start = skb_headlen(skb); 1075 int i, copy = start - offset; 1076 int pos = 0; 1077 1078 /* Checksum header. */ 1079 if (copy > 0) { 1080 if (copy > len) 1081 copy = len; 1082 csum = csum_partial(skb->data + offset, copy, csum); 1083 if ((len -= copy) == 0) 1084 return csum; 1085 offset += copy; 1086 pos = copy; 1087 } 1088 1089 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1090 int end; 1091 1092 BUG_TRAP(start <= offset + len); 1093 1094 end = start + skb_shinfo(skb)->frags[i].size; 1095 if ((copy = end - offset) > 0) { 1096 unsigned int csum2; 1097 u8 *vaddr; 1098 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 1099 1100 if (copy > len) 1101 copy = len; 1102 vaddr = kmap_skb_frag(frag); 1103 csum2 = csum_partial(vaddr + frag->page_offset + 1104 offset - start, copy, 0); 1105 kunmap_skb_frag(vaddr); 1106 csum = csum_block_add(csum, csum2, pos); 1107 if (!(len -= copy)) 1108 return csum; 1109 offset += copy; 1110 pos += copy; 1111 } 1112 start = end; 1113 } 1114 1115 if (skb_shinfo(skb)->frag_list) { 1116 struct sk_buff *list = skb_shinfo(skb)->frag_list; 1117 1118 for (; list; list = list->next) { 1119 int end; 1120 1121 BUG_TRAP(start <= offset + len); 1122 1123 end = start + list->len; 1124 if ((copy = end - offset) > 0) { 1125 unsigned int csum2; 1126 if (copy > len) 1127 copy = len; 1128 csum2 = skb_checksum(list, offset - start, 1129 copy, 0); 1130 csum = csum_block_add(csum, csum2, pos); 1131 if ((len -= copy) == 0) 1132 return csum; 1133 offset += copy; 1134 pos += copy; 1135 } 1136 start = end; 1137 } 1138 } 1139 if (len) 1140 BUG(); 1141 1142 return csum; 1143 } 1144 1145 /* Both of above in one bottle. */ 1146 1147 unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, 1148 u8 *to, int len, unsigned int csum) 1149 { 1150 int start = skb_headlen(skb); 1151 int i, copy = start - offset; 1152 int pos = 0; 1153 1154 /* Copy header. */ 1155 if (copy > 0) { 1156 if (copy > len) 1157 copy = len; 1158 csum = csum_partial_copy_nocheck(skb->data + offset, to, 1159 copy, csum); 1160 if ((len -= copy) == 0) 1161 return csum; 1162 offset += copy; 1163 to += copy; 1164 pos = copy; 1165 } 1166 1167 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1168 int end; 1169 1170 BUG_TRAP(start <= offset + len); 1171 1172 end = start + skb_shinfo(skb)->frags[i].size; 1173 if ((copy = end - offset) > 0) { 1174 unsigned int csum2; 1175 u8 *vaddr; 1176 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 1177 1178 if (copy > len) 1179 copy = len; 1180 vaddr = kmap_skb_frag(frag); 1181 csum2 = csum_partial_copy_nocheck(vaddr + 1182 frag->page_offset + 1183 offset - start, to, 1184 copy, 0); 1185 kunmap_skb_frag(vaddr); 1186 csum = csum_block_add(csum, csum2, pos); 1187 if (!(len -= copy)) 1188 return csum; 1189 offset += copy; 1190 to += copy; 1191 pos += copy; 1192 } 1193 start = end; 1194 } 1195 1196 if (skb_shinfo(skb)->frag_list) { 1197 struct sk_buff *list = skb_shinfo(skb)->frag_list; 1198 1199 for (; list; list = list->next) { 1200 unsigned int csum2; 1201 int end; 1202 1203 BUG_TRAP(start <= offset + len); 1204 1205 end = start + list->len; 1206 if ((copy = end - offset) > 0) { 1207 if (copy > len) 1208 copy = len; 1209 csum2 = skb_copy_and_csum_bits(list, 1210 offset - start, 1211 to, copy, 0); 1212 csum = csum_block_add(csum, csum2, pos); 1213 if ((len -= copy) == 0) 1214 return csum; 1215 offset += copy; 1216 to += copy; 1217 pos += copy; 1218 } 1219 start = end; 1220 } 1221 } 1222 if (len) 1223 BUG(); 1224 return csum; 1225 } 1226 1227 void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) 1228 { 1229 unsigned int csum; 1230 long csstart; 1231 1232 if (skb->ip_summed == CHECKSUM_HW) 1233 csstart = skb->h.raw - skb->data; 1234 else 1235 csstart = skb_headlen(skb); 1236 1237 if (csstart > skb_headlen(skb)) 1238 BUG(); 1239 1240 memcpy(to, skb->data, csstart); 1241 1242 csum = 0; 1243 if (csstart != skb->len) 1244 csum = skb_copy_and_csum_bits(skb, csstart, to + csstart, 1245 skb->len - csstart, 0); 1246 1247 if (skb->ip_summed == CHECKSUM_HW) { 1248 long csstuff = csstart + skb->csum; 1249 1250 *((unsigned short *)(to + csstuff)) = csum_fold(csum); 1251 } 1252 } 1253 1254 /** 1255 * skb_dequeue - remove from the head of the queue 1256 * @list: list to dequeue from 1257 * 1258 * Remove the head of the list. The list lock is taken so the function 1259 * may be used safely with other locking list functions. The head item is 1260 * returned or %NULL if the list is empty. 1261 */ 1262 1263 struct sk_buff *skb_dequeue(struct sk_buff_head *list) 1264 { 1265 unsigned long flags; 1266 struct sk_buff *result; 1267 1268 spin_lock_irqsave(&list->lock, flags); 1269 result = __skb_dequeue(list); 1270 spin_unlock_irqrestore(&list->lock, flags); 1271 return result; 1272 } 1273 1274 /** 1275 * skb_dequeue_tail - remove from the tail of the queue 1276 * @list: list to dequeue from 1277 * 1278 * Remove the tail of the list. The list lock is taken so the function 1279 * may be used safely with other locking list functions. The tail item is 1280 * returned or %NULL if the list is empty. 1281 */ 1282 struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list) 1283 { 1284 unsigned long flags; 1285 struct sk_buff *result; 1286 1287 spin_lock_irqsave(&list->lock, flags); 1288 result = __skb_dequeue_tail(list); 1289 spin_unlock_irqrestore(&list->lock, flags); 1290 return result; 1291 } 1292 1293 /** 1294 * skb_queue_purge - empty a list 1295 * @list: list to empty 1296 * 1297 * Delete all buffers on an &sk_buff list. Each buffer is removed from 1298 * the list and one reference dropped. This function takes the list 1299 * lock and is atomic with respect to other list locking functions. 1300 */ 1301 void skb_queue_purge(struct sk_buff_head *list) 1302 { 1303 struct sk_buff *skb; 1304 while ((skb = skb_dequeue(list)) != NULL) 1305 kfree_skb(skb); 1306 } 1307 1308 /** 1309 * skb_queue_head - queue a buffer at the list head 1310 * @list: list to use 1311 * @newsk: buffer to queue 1312 * 1313 * Queue a buffer at the start of the list. This function takes the 1314 * list lock and can be used safely with other locking &sk_buff functions 1315 * safely. 1316 * 1317 * A buffer cannot be placed on two lists at the same time. 1318 */ 1319 void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) 1320 { 1321 unsigned long flags; 1322 1323 spin_lock_irqsave(&list->lock, flags); 1324 __skb_queue_head(list, newsk); 1325 spin_unlock_irqrestore(&list->lock, flags); 1326 } 1327 1328 /** 1329 * skb_queue_tail - queue a buffer at the list tail 1330 * @list: list to use 1331 * @newsk: buffer to queue 1332 * 1333 * Queue a buffer at the tail of the list. This function takes the 1334 * list lock and can be used safely with other locking &sk_buff functions 1335 * safely. 1336 * 1337 * A buffer cannot be placed on two lists at the same time. 1338 */ 1339 void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) 1340 { 1341 unsigned long flags; 1342 1343 spin_lock_irqsave(&list->lock, flags); 1344 __skb_queue_tail(list, newsk); 1345 spin_unlock_irqrestore(&list->lock, flags); 1346 } 1347 /** 1348 * skb_unlink - remove a buffer from a list 1349 * @skb: buffer to remove 1350 * 1351 * Place a packet after a given packet in a list. The list locks are taken 1352 * and this function is atomic with respect to other list locked calls 1353 * 1354 * Works even without knowing the list it is sitting on, which can be 1355 * handy at times. It also means that THE LIST MUST EXIST when you 1356 * unlink. Thus a list must have its contents unlinked before it is 1357 * destroyed. 1358 */ 1359 void skb_unlink(struct sk_buff *skb) 1360 { 1361 struct sk_buff_head *list = skb->list; 1362 1363 if (list) { 1364 unsigned long flags; 1365 1366 spin_lock_irqsave(&list->lock, flags); 1367 if (skb->list == list) 1368 __skb_unlink(skb, skb->list); 1369 spin_unlock_irqrestore(&list->lock, flags); 1370 } 1371 } 1372 1373 1374 /** 1375 * skb_append - append a buffer 1376 * @old: buffer to insert after 1377 * @newsk: buffer to insert 1378 * 1379 * Place a packet after a given packet in a list. The list locks are taken 1380 * and this function is atomic with respect to other list locked calls. 1381 * A buffer cannot be placed on two lists at the same time. 1382 */ 1383 1384 void skb_append(struct sk_buff *old, struct sk_buff *newsk) 1385 { 1386 unsigned long flags; 1387 1388 spin_lock_irqsave(&old->list->lock, flags); 1389 __skb_append(old, newsk); 1390 spin_unlock_irqrestore(&old->list->lock, flags); 1391 } 1392 1393 1394 /** 1395 * skb_insert - insert a buffer 1396 * @old: buffer to insert before 1397 * @newsk: buffer to insert 1398 * 1399 * Place a packet before a given packet in a list. The list locks are taken 1400 * and this function is atomic with respect to other list locked calls 1401 * A buffer cannot be placed on two lists at the same time. 1402 */ 1403 1404 void skb_insert(struct sk_buff *old, struct sk_buff *newsk) 1405 { 1406 unsigned long flags; 1407 1408 spin_lock_irqsave(&old->list->lock, flags); 1409 __skb_insert(newsk, old->prev, old, old->list); 1410 spin_unlock_irqrestore(&old->list->lock, flags); 1411 } 1412 1413 #if 0 1414 /* 1415 * Tune the memory allocator for a new MTU size. 1416 */ 1417 void skb_add_mtu(int mtu) 1418 { 1419 /* Must match allocation in alloc_skb */ 1420 mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info); 1421 1422 kmem_add_cache_size(mtu); 1423 } 1424 #endif 1425 1426 static inline void skb_split_inside_header(struct sk_buff *skb, 1427 struct sk_buff* skb1, 1428 const u32 len, const int pos) 1429 { 1430 int i; 1431 1432 memcpy(skb_put(skb1, pos - len), skb->data + len, pos - len); 1433 1434 /* And move data appendix as is. */ 1435 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 1436 skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i]; 1437 1438 skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags; 1439 skb_shinfo(skb)->nr_frags = 0; 1440 skb1->data_len = skb->data_len; 1441 skb1->len += skb1->data_len; 1442 skb->data_len = 0; 1443 skb->len = len; 1444 skb->tail = skb->data + len; 1445 } 1446 1447 static inline void skb_split_no_header(struct sk_buff *skb, 1448 struct sk_buff* skb1, 1449 const u32 len, int pos) 1450 { 1451 int i, k = 0; 1452 const int nfrags = skb_shinfo(skb)->nr_frags; 1453 1454 skb_shinfo(skb)->nr_frags = 0; 1455 skb1->len = skb1->data_len = skb->len - len; 1456 skb->len = len; 1457 skb->data_len = len - pos; 1458 1459 for (i = 0; i < nfrags; i++) { 1460 int size = skb_shinfo(skb)->frags[i].size; 1461 1462 if (pos + size > len) { 1463 skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i]; 1464 1465 if (pos < len) { 1466 /* Split frag. 1467 * We have two variants in this case: 1468 * 1. Move all the frag to the second 1469 * part, if it is possible. F.e. 1470 * this approach is mandatory for TUX, 1471 * where splitting is expensive. 1472 * 2. Split is accurately. We make this. 1473 */ 1474 get_page(skb_shinfo(skb)->frags[i].page); 1475 skb_shinfo(skb1)->frags[0].page_offset += len - pos; 1476 skb_shinfo(skb1)->frags[0].size -= len - pos; 1477 skb_shinfo(skb)->frags[i].size = len - pos; 1478 skb_shinfo(skb)->nr_frags++; 1479 } 1480 k++; 1481 } else 1482 skb_shinfo(skb)->nr_frags++; 1483 pos += size; 1484 } 1485 skb_shinfo(skb1)->nr_frags = k; 1486 } 1487 1488 /** 1489 * skb_split - Split fragmented skb to two parts at length len. 1490 * @skb: the buffer to split 1491 * @skb1: the buffer to receive the second part 1492 * @len: new length for skb 1493 */ 1494 void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) 1495 { 1496 int pos = skb_headlen(skb); 1497 1498 if (len < pos) /* Split line is inside header. */ 1499 skb_split_inside_header(skb, skb1, len, pos); 1500 else /* Second chunk has no header, nothing to copy. */ 1501 skb_split_no_header(skb, skb1, len, pos); 1502 } 1503 1504 /** 1505 * skb_prepare_seq_read - Prepare a sequential read of skb data 1506 * @skb: the buffer to read 1507 * @from: lower offset of data to be read 1508 * @to: upper offset of data to be read 1509 * @st: state variable 1510 * 1511 * Initializes the specified state variable. Must be called before 1512 * invoking skb_seq_read() for the first time. 1513 */ 1514 void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, 1515 unsigned int to, struct skb_seq_state *st) 1516 { 1517 st->lower_offset = from; 1518 st->upper_offset = to; 1519 st->root_skb = st->cur_skb = skb; 1520 st->frag_idx = st->stepped_offset = 0; 1521 st->frag_data = NULL; 1522 } 1523 1524 /** 1525 * skb_seq_read - Sequentially read skb data 1526 * @consumed: number of bytes consumed by the caller so far 1527 * @data: destination pointer for data to be returned 1528 * @st: state variable 1529 * 1530 * Reads a block of skb data at &consumed relative to the 1531 * lower offset specified to skb_prepare_seq_read(). Assigns 1532 * the head of the data block to &data and returns the length 1533 * of the block or 0 if the end of the skb data or the upper 1534 * offset has been reached. 1535 * 1536 * The caller is not required to consume all of the data 1537 * returned, i.e. &consumed is typically set to the number 1538 * of bytes already consumed and the next call to 1539 * skb_seq_read() will return the remaining part of the block. 1540 * 1541 * Note: The size of each block of data returned can be arbitary, 1542 * this limitation is the cost for zerocopy seqeuental 1543 * reads of potentially non linear data. 1544 * 1545 * Note: Fragment lists within fragments are not implemented 1546 * at the moment, state->root_skb could be replaced with 1547 * a stack for this purpose. 1548 */ 1549 unsigned int skb_seq_read(unsigned int consumed, const u8 **data, 1550 struct skb_seq_state *st) 1551 { 1552 unsigned int block_limit, abs_offset = consumed + st->lower_offset; 1553 skb_frag_t *frag; 1554 1555 if (unlikely(abs_offset >= st->upper_offset)) 1556 return 0; 1557 1558 next_skb: 1559 block_limit = skb_headlen(st->cur_skb); 1560 1561 if (abs_offset < block_limit) { 1562 *data = st->cur_skb->data + abs_offset; 1563 return block_limit - abs_offset; 1564 } 1565 1566 if (st->frag_idx == 0 && !st->frag_data) 1567 st->stepped_offset += skb_headlen(st->cur_skb); 1568 1569 while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) { 1570 frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx]; 1571 block_limit = frag->size + st->stepped_offset; 1572 1573 if (abs_offset < block_limit) { 1574 if (!st->frag_data) 1575 st->frag_data = kmap_skb_frag(frag); 1576 1577 *data = (u8 *) st->frag_data + frag->page_offset + 1578 (abs_offset - st->stepped_offset); 1579 1580 return block_limit - abs_offset; 1581 } 1582 1583 if (st->frag_data) { 1584 kunmap_skb_frag(st->frag_data); 1585 st->frag_data = NULL; 1586 } 1587 1588 st->frag_idx++; 1589 st->stepped_offset += frag->size; 1590 } 1591 1592 if (st->cur_skb->next) { 1593 st->cur_skb = st->cur_skb->next; 1594 st->frag_idx = 0; 1595 goto next_skb; 1596 } else if (st->root_skb == st->cur_skb && 1597 skb_shinfo(st->root_skb)->frag_list) { 1598 st->cur_skb = skb_shinfo(st->root_skb)->frag_list; 1599 goto next_skb; 1600 } 1601 1602 return 0; 1603 } 1604 1605 /** 1606 * skb_abort_seq_read - Abort a sequential read of skb data 1607 * @st: state variable 1608 * 1609 * Must be called if skb_seq_read() was not called until it 1610 * returned 0. 1611 */ 1612 void skb_abort_seq_read(struct skb_seq_state *st) 1613 { 1614 if (st->frag_data) 1615 kunmap_skb_frag(st->frag_data); 1616 } 1617 1618 #define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb)) 1619 1620 static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text, 1621 struct ts_config *conf, 1622 struct ts_state *state) 1623 { 1624 return skb_seq_read(offset, text, TS_SKB_CB(state)); 1625 } 1626 1627 static void skb_ts_finish(struct ts_config *conf, struct ts_state *state) 1628 { 1629 skb_abort_seq_read(TS_SKB_CB(state)); 1630 } 1631 1632 /** 1633 * skb_find_text - Find a text pattern in skb data 1634 * @skb: the buffer to look in 1635 * @from: search offset 1636 * @to: search limit 1637 * @config: textsearch configuration 1638 * @state: uninitialized textsearch state variable 1639 * 1640 * Finds a pattern in the skb data according to the specified 1641 * textsearch configuration. Use textsearch_next() to retrieve 1642 * subsequent occurrences of the pattern. Returns the offset 1643 * to the first occurrence or UINT_MAX if no match was found. 1644 */ 1645 unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, 1646 unsigned int to, struct ts_config *config, 1647 struct ts_state *state) 1648 { 1649 config->get_next_block = skb_ts_get_next_block; 1650 config->finish = skb_ts_finish; 1651 1652 skb_prepare_seq_read(skb, from, to, TS_SKB_CB(state)); 1653 1654 return textsearch_find(config, state); 1655 } 1656 1657 void __init skb_init(void) 1658 { 1659 skbuff_head_cache = kmem_cache_create("skbuff_head_cache", 1660 sizeof(struct sk_buff), 1661 0, 1662 SLAB_HWCACHE_ALIGN, 1663 NULL, NULL); 1664 if (!skbuff_head_cache) 1665 panic("cannot create skbuff cache"); 1666 } 1667 1668 EXPORT_SYMBOL(___pskb_trim); 1669 EXPORT_SYMBOL(__kfree_skb); 1670 EXPORT_SYMBOL(__pskb_pull_tail); 1671 EXPORT_SYMBOL(alloc_skb); 1672 EXPORT_SYMBOL(pskb_copy); 1673 EXPORT_SYMBOL(pskb_expand_head); 1674 EXPORT_SYMBOL(skb_checksum); 1675 EXPORT_SYMBOL(skb_clone); 1676 EXPORT_SYMBOL(skb_clone_fraglist); 1677 EXPORT_SYMBOL(skb_copy); 1678 EXPORT_SYMBOL(skb_copy_and_csum_bits); 1679 EXPORT_SYMBOL(skb_copy_and_csum_dev); 1680 EXPORT_SYMBOL(skb_copy_bits); 1681 EXPORT_SYMBOL(skb_copy_expand); 1682 EXPORT_SYMBOL(skb_over_panic); 1683 EXPORT_SYMBOL(skb_pad); 1684 EXPORT_SYMBOL(skb_realloc_headroom); 1685 EXPORT_SYMBOL(skb_under_panic); 1686 EXPORT_SYMBOL(skb_dequeue); 1687 EXPORT_SYMBOL(skb_dequeue_tail); 1688 EXPORT_SYMBOL(skb_insert); 1689 EXPORT_SYMBOL(skb_queue_purge); 1690 EXPORT_SYMBOL(skb_queue_head); 1691 EXPORT_SYMBOL(skb_queue_tail); 1692 EXPORT_SYMBOL(skb_unlink); 1693 EXPORT_SYMBOL(skb_append); 1694 EXPORT_SYMBOL(skb_split); 1695 EXPORT_SYMBOL(skb_prepare_seq_read); 1696 EXPORT_SYMBOL(skb_seq_read); 1697 EXPORT_SYMBOL(skb_abort_seq_read); 1698 EXPORT_SYMBOL(skb_find_text); 1699