1 /* 2 * Routines having to do with the 'struct sk_buff' memory handlers. 3 * 4 * Authors: Alan Cox <iiitac@pyr.swan.ac.uk> 5 * Florian La Roche <rzsfl@rz.uni-sb.de> 6 * 7 * Version: $Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $ 8 * 9 * Fixes: 10 * Alan Cox : Fixed the worst of the load 11 * balancer bugs. 12 * Dave Platt : Interrupt stacking fix. 13 * Richard Kooijman : Timestamp fixes. 14 * Alan Cox : Changed buffer format. 15 * Alan Cox : destructor hook for AF_UNIX etc. 16 * Linus Torvalds : Better skb_clone. 17 * Alan Cox : Added skb_copy. 18 * Alan Cox : Added all the changed routines Linus 19 * only put in the headers 20 * Ray VanTassle : Fixed --skb->lock in free 21 * Alan Cox : skb_copy copy arp field 22 * Andi Kleen : slabified it. 23 * Robert Olsson : Removed skb_head_pool 24 * 25 * NOTE: 26 * The __skb_ routines should be called with interrupts 27 * disabled, or you better be *real* sure that the operation is atomic 28 * with respect to whatever list is being frobbed (e.g. via lock_sock() 29 * or via disabling bottom half handlers, etc). 30 * 31 * This program is free software; you can redistribute it and/or 32 * modify it under the terms of the GNU General Public License 33 * as published by the Free Software Foundation; either version 34 * 2 of the License, or (at your option) any later version. 35 */ 36 37 /* 38 * The functions in this file will not compile correctly with gcc 2.4.x 39 */ 40 41 #include <linux/config.h> 42 #include <linux/module.h> 43 #include <linux/types.h> 44 #include <linux/kernel.h> 45 #include <linux/sched.h> 46 #include <linux/mm.h> 47 #include <linux/interrupt.h> 48 #include <linux/in.h> 49 #include <linux/inet.h> 50 #include <linux/slab.h> 51 #include <linux/netdevice.h> 52 #ifdef CONFIG_NET_CLS_ACT 53 #include <net/pkt_sched.h> 54 #endif 55 #include <linux/string.h> 56 #include <linux/skbuff.h> 57 #include <linux/cache.h> 58 #include <linux/rtnetlink.h> 59 #include <linux/init.h> 60 #include <linux/highmem.h> 61 62 #include <net/protocol.h> 63 #include <net/dst.h> 64 #include <net/sock.h> 65 #include <net/checksum.h> 66 #include <net/xfrm.h> 67 68 #include <asm/uaccess.h> 69 #include <asm/system.h> 70 71 static kmem_cache_t *skbuff_head_cache; 72 73 /* 74 * Keep out-of-line to prevent kernel bloat. 75 * __builtin_return_address is not used because it is not always 76 * reliable. 77 */ 78 79 /** 80 * skb_over_panic - private function 81 * @skb: buffer 82 * @sz: size 83 * @here: address 84 * 85 * Out of line support code for skb_put(). Not user callable. 86 */ 87 void skb_over_panic(struct sk_buff *skb, int sz, void *here) 88 { 89 printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p " 90 "data:%p tail:%p end:%p dev:%s\n", 91 here, skb->len, sz, skb->head, skb->data, skb->tail, skb->end, 92 skb->dev ? skb->dev->name : "<NULL>"); 93 BUG(); 94 } 95 96 /** 97 * skb_under_panic - private function 98 * @skb: buffer 99 * @sz: size 100 * @here: address 101 * 102 * Out of line support code for skb_push(). Not user callable. 103 */ 104 105 void skb_under_panic(struct sk_buff *skb, int sz, void *here) 106 { 107 printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p " 108 "data:%p tail:%p end:%p dev:%s\n", 109 here, skb->len, sz, skb->head, skb->data, skb->tail, skb->end, 110 skb->dev ? skb->dev->name : "<NULL>"); 111 BUG(); 112 } 113 114 /* Allocate a new skbuff. We do this ourselves so we can fill in a few 115 * 'private' fields and also do memory statistics to find all the 116 * [BEEP] leaks. 117 * 118 */ 119 120 /** 121 * alloc_skb - allocate a network buffer 122 * @size: size to allocate 123 * @gfp_mask: allocation mask 124 * 125 * Allocate a new &sk_buff. The returned buffer has no headroom and a 126 * tail room of size bytes. The object has a reference count of one. 127 * The return is the buffer. On a failure the return is %NULL. 128 * 129 * Buffers may only be allocated from interrupts using a @gfp_mask of 130 * %GFP_ATOMIC. 131 */ 132 struct sk_buff *alloc_skb(unsigned int size, int gfp_mask) 133 { 134 struct sk_buff *skb; 135 u8 *data; 136 137 /* Get the HEAD */ 138 skb = kmem_cache_alloc(skbuff_head_cache, 139 gfp_mask & ~__GFP_DMA); 140 if (!skb) 141 goto out; 142 143 /* Get the DATA. Size must match skb_add_mtu(). */ 144 size = SKB_DATA_ALIGN(size); 145 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); 146 if (!data) 147 goto nodata; 148 149 memset(skb, 0, offsetof(struct sk_buff, truesize)); 150 skb->truesize = size + sizeof(struct sk_buff); 151 atomic_set(&skb->users, 1); 152 skb->head = data; 153 skb->data = data; 154 skb->tail = data; 155 skb->end = data + size; 156 157 atomic_set(&(skb_shinfo(skb)->dataref), 1); 158 skb_shinfo(skb)->nr_frags = 0; 159 skb_shinfo(skb)->tso_size = 0; 160 skb_shinfo(skb)->tso_segs = 0; 161 skb_shinfo(skb)->frag_list = NULL; 162 out: 163 return skb; 164 nodata: 165 kmem_cache_free(skbuff_head_cache, skb); 166 skb = NULL; 167 goto out; 168 } 169 170 /** 171 * alloc_skb_from_cache - allocate a network buffer 172 * @cp: kmem_cache from which to allocate the data area 173 * (object size must be big enough for @size bytes + skb overheads) 174 * @size: size to allocate 175 * @gfp_mask: allocation mask 176 * 177 * Allocate a new &sk_buff. The returned buffer has no headroom and 178 * tail room of size bytes. The object has a reference count of one. 179 * The return is the buffer. On a failure the return is %NULL. 180 * 181 * Buffers may only be allocated from interrupts using a @gfp_mask of 182 * %GFP_ATOMIC. 183 */ 184 struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, 185 unsigned int size, int gfp_mask) 186 { 187 struct sk_buff *skb; 188 u8 *data; 189 190 /* Get the HEAD */ 191 skb = kmem_cache_alloc(skbuff_head_cache, 192 gfp_mask & ~__GFP_DMA); 193 if (!skb) 194 goto out; 195 196 /* Get the DATA. */ 197 size = SKB_DATA_ALIGN(size); 198 data = kmem_cache_alloc(cp, gfp_mask); 199 if (!data) 200 goto nodata; 201 202 memset(skb, 0, offsetof(struct sk_buff, truesize)); 203 skb->truesize = size + sizeof(struct sk_buff); 204 atomic_set(&skb->users, 1); 205 skb->head = data; 206 skb->data = data; 207 skb->tail = data; 208 skb->end = data + size; 209 210 atomic_set(&(skb_shinfo(skb)->dataref), 1); 211 skb_shinfo(skb)->nr_frags = 0; 212 skb_shinfo(skb)->tso_size = 0; 213 skb_shinfo(skb)->tso_segs = 0; 214 skb_shinfo(skb)->frag_list = NULL; 215 out: 216 return skb; 217 nodata: 218 kmem_cache_free(skbuff_head_cache, skb); 219 skb = NULL; 220 goto out; 221 } 222 223 224 static void skb_drop_fraglist(struct sk_buff *skb) 225 { 226 struct sk_buff *list = skb_shinfo(skb)->frag_list; 227 228 skb_shinfo(skb)->frag_list = NULL; 229 230 do { 231 struct sk_buff *this = list; 232 list = list->next; 233 kfree_skb(this); 234 } while (list); 235 } 236 237 static void skb_clone_fraglist(struct sk_buff *skb) 238 { 239 struct sk_buff *list; 240 241 for (list = skb_shinfo(skb)->frag_list; list; list = list->next) 242 skb_get(list); 243 } 244 245 void skb_release_data(struct sk_buff *skb) 246 { 247 if (!skb->cloned || 248 !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1, 249 &skb_shinfo(skb)->dataref)) { 250 if (skb_shinfo(skb)->nr_frags) { 251 int i; 252 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 253 put_page(skb_shinfo(skb)->frags[i].page); 254 } 255 256 if (skb_shinfo(skb)->frag_list) 257 skb_drop_fraglist(skb); 258 259 kfree(skb->head); 260 } 261 } 262 263 /* 264 * Free an skbuff by memory without cleaning the state. 265 */ 266 void kfree_skbmem(struct sk_buff *skb) 267 { 268 skb_release_data(skb); 269 kmem_cache_free(skbuff_head_cache, skb); 270 } 271 272 /** 273 * __kfree_skb - private function 274 * @skb: buffer 275 * 276 * Free an sk_buff. Release anything attached to the buffer. 277 * Clean the state. This is an internal helper function. Users should 278 * always call kfree_skb 279 */ 280 281 void __kfree_skb(struct sk_buff *skb) 282 { 283 BUG_ON(skb->list != NULL); 284 285 dst_release(skb->dst); 286 #ifdef CONFIG_XFRM 287 secpath_put(skb->sp); 288 #endif 289 if (skb->destructor) { 290 WARN_ON(in_irq()); 291 skb->destructor(skb); 292 } 293 #ifdef CONFIG_NETFILTER 294 nf_conntrack_put(skb->nfct); 295 #ifdef CONFIG_BRIDGE_NETFILTER 296 nf_bridge_put(skb->nf_bridge); 297 #endif 298 #endif 299 /* XXX: IS this still necessary? - JHS */ 300 #ifdef CONFIG_NET_SCHED 301 skb->tc_index = 0; 302 #ifdef CONFIG_NET_CLS_ACT 303 skb->tc_verd = 0; 304 skb->tc_classid = 0; 305 #endif 306 #endif 307 308 kfree_skbmem(skb); 309 } 310 311 /** 312 * skb_clone - duplicate an sk_buff 313 * @skb: buffer to clone 314 * @gfp_mask: allocation priority 315 * 316 * Duplicate an &sk_buff. The new one is not owned by a socket. Both 317 * copies share the same packet data but not structure. The new 318 * buffer has a reference count of 1. If the allocation fails the 319 * function returns %NULL otherwise the new buffer is returned. 320 * 321 * If this function is called from an interrupt gfp_mask() must be 322 * %GFP_ATOMIC. 323 */ 324 325 struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask) 326 { 327 struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); 328 329 if (!n) 330 return NULL; 331 332 #define C(x) n->x = skb->x 333 334 n->next = n->prev = NULL; 335 n->list = NULL; 336 n->sk = NULL; 337 C(stamp); 338 C(dev); 339 C(real_dev); 340 C(h); 341 C(nh); 342 C(mac); 343 C(dst); 344 dst_clone(skb->dst); 345 C(sp); 346 #ifdef CONFIG_INET 347 secpath_get(skb->sp); 348 #endif 349 memcpy(n->cb, skb->cb, sizeof(skb->cb)); 350 C(len); 351 C(data_len); 352 C(csum); 353 C(local_df); 354 n->cloned = 1; 355 n->nohdr = 0; 356 C(pkt_type); 357 C(ip_summed); 358 C(priority); 359 C(protocol); 360 n->destructor = NULL; 361 #ifdef CONFIG_NETFILTER 362 C(nfmark); 363 C(nfcache); 364 C(nfct); 365 nf_conntrack_get(skb->nfct); 366 C(nfctinfo); 367 #ifdef CONFIG_BRIDGE_NETFILTER 368 C(nf_bridge); 369 nf_bridge_get(skb->nf_bridge); 370 #endif 371 #endif /*CONFIG_NETFILTER*/ 372 #if defined(CONFIG_HIPPI) 373 C(private); 374 #endif 375 #ifdef CONFIG_NET_SCHED 376 C(tc_index); 377 #ifdef CONFIG_NET_CLS_ACT 378 n->tc_verd = SET_TC_VERD(skb->tc_verd,0); 379 n->tc_verd = CLR_TC_OK2MUNGE(skb->tc_verd); 380 n->tc_verd = CLR_TC_MUNGED(skb->tc_verd); 381 C(input_dev); 382 C(tc_classid); 383 #endif 384 385 #endif 386 C(truesize); 387 atomic_set(&n->users, 1); 388 C(head); 389 C(data); 390 C(tail); 391 C(end); 392 393 atomic_inc(&(skb_shinfo(skb)->dataref)); 394 skb->cloned = 1; 395 396 return n; 397 } 398 399 static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) 400 { 401 /* 402 * Shift between the two data areas in bytes 403 */ 404 unsigned long offset = new->data - old->data; 405 406 new->list = NULL; 407 new->sk = NULL; 408 new->dev = old->dev; 409 new->real_dev = old->real_dev; 410 new->priority = old->priority; 411 new->protocol = old->protocol; 412 new->dst = dst_clone(old->dst); 413 #ifdef CONFIG_INET 414 new->sp = secpath_get(old->sp); 415 #endif 416 new->h.raw = old->h.raw + offset; 417 new->nh.raw = old->nh.raw + offset; 418 new->mac.raw = old->mac.raw + offset; 419 memcpy(new->cb, old->cb, sizeof(old->cb)); 420 new->local_df = old->local_df; 421 new->pkt_type = old->pkt_type; 422 new->stamp = old->stamp; 423 new->destructor = NULL; 424 #ifdef CONFIG_NETFILTER 425 new->nfmark = old->nfmark; 426 new->nfcache = old->nfcache; 427 new->nfct = old->nfct; 428 nf_conntrack_get(old->nfct); 429 new->nfctinfo = old->nfctinfo; 430 #ifdef CONFIG_BRIDGE_NETFILTER 431 new->nf_bridge = old->nf_bridge; 432 nf_bridge_get(old->nf_bridge); 433 #endif 434 #endif 435 #ifdef CONFIG_NET_SCHED 436 #ifdef CONFIG_NET_CLS_ACT 437 new->tc_verd = old->tc_verd; 438 #endif 439 new->tc_index = old->tc_index; 440 #endif 441 atomic_set(&new->users, 1); 442 skb_shinfo(new)->tso_size = skb_shinfo(old)->tso_size; 443 skb_shinfo(new)->tso_segs = skb_shinfo(old)->tso_segs; 444 } 445 446 /** 447 * skb_copy - create private copy of an sk_buff 448 * @skb: buffer to copy 449 * @gfp_mask: allocation priority 450 * 451 * Make a copy of both an &sk_buff and its data. This is used when the 452 * caller wishes to modify the data and needs a private copy of the 453 * data to alter. Returns %NULL on failure or the pointer to the buffer 454 * on success. The returned buffer has a reference count of 1. 455 * 456 * As by-product this function converts non-linear &sk_buff to linear 457 * one, so that &sk_buff becomes completely private and caller is allowed 458 * to modify all the data of returned buffer. This means that this 459 * function is not recommended for use in circumstances when only 460 * header is going to be modified. Use pskb_copy() instead. 461 */ 462 463 struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask) 464 { 465 int headerlen = skb->data - skb->head; 466 /* 467 * Allocate the copy buffer 468 */ 469 struct sk_buff *n = alloc_skb(skb->end - skb->head + skb->data_len, 470 gfp_mask); 471 if (!n) 472 return NULL; 473 474 /* Set the data pointer */ 475 skb_reserve(n, headerlen); 476 /* Set the tail pointer and length */ 477 skb_put(n, skb->len); 478 n->csum = skb->csum; 479 n->ip_summed = skb->ip_summed; 480 481 if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)) 482 BUG(); 483 484 copy_skb_header(n, skb); 485 return n; 486 } 487 488 489 /** 490 * pskb_copy - create copy of an sk_buff with private head. 491 * @skb: buffer to copy 492 * @gfp_mask: allocation priority 493 * 494 * Make a copy of both an &sk_buff and part of its data, located 495 * in header. Fragmented data remain shared. This is used when 496 * the caller wishes to modify only header of &sk_buff and needs 497 * private copy of the header to alter. Returns %NULL on failure 498 * or the pointer to the buffer on success. 499 * The returned buffer has a reference count of 1. 500 */ 501 502 struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask) 503 { 504 /* 505 * Allocate the copy buffer 506 */ 507 struct sk_buff *n = alloc_skb(skb->end - skb->head, gfp_mask); 508 509 if (!n) 510 goto out; 511 512 /* Set the data pointer */ 513 skb_reserve(n, skb->data - skb->head); 514 /* Set the tail pointer and length */ 515 skb_put(n, skb_headlen(skb)); 516 /* Copy the bytes */ 517 memcpy(n->data, skb->data, n->len); 518 n->csum = skb->csum; 519 n->ip_summed = skb->ip_summed; 520 521 n->data_len = skb->data_len; 522 n->len = skb->len; 523 524 if (skb_shinfo(skb)->nr_frags) { 525 int i; 526 527 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 528 skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; 529 get_page(skb_shinfo(n)->frags[i].page); 530 } 531 skb_shinfo(n)->nr_frags = i; 532 } 533 534 if (skb_shinfo(skb)->frag_list) { 535 skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; 536 skb_clone_fraglist(n); 537 } 538 539 copy_skb_header(n, skb); 540 out: 541 return n; 542 } 543 544 /** 545 * pskb_expand_head - reallocate header of &sk_buff 546 * @skb: buffer to reallocate 547 * @nhead: room to add at head 548 * @ntail: room to add at tail 549 * @gfp_mask: allocation priority 550 * 551 * Expands (or creates identical copy, if &nhead and &ntail are zero) 552 * header of skb. &sk_buff itself is not changed. &sk_buff MUST have 553 * reference count of 1. Returns zero in the case of success or error, 554 * if expansion failed. In the last case, &sk_buff is not changed. 555 * 556 * All the pointers pointing into skb header may change and must be 557 * reloaded after call to this function. 558 */ 559 560 int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask) 561 { 562 int i; 563 u8 *data; 564 int size = nhead + (skb->end - skb->head) + ntail; 565 long off; 566 567 if (skb_shared(skb)) 568 BUG(); 569 570 size = SKB_DATA_ALIGN(size); 571 572 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); 573 if (!data) 574 goto nodata; 575 576 /* Copy only real data... and, alas, header. This should be 577 * optimized for the cases when header is void. */ 578 memcpy(data + nhead, skb->head, skb->tail - skb->head); 579 memcpy(data + size, skb->end, sizeof(struct skb_shared_info)); 580 581 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 582 get_page(skb_shinfo(skb)->frags[i].page); 583 584 if (skb_shinfo(skb)->frag_list) 585 skb_clone_fraglist(skb); 586 587 skb_release_data(skb); 588 589 off = (data + nhead) - skb->head; 590 591 skb->head = data; 592 skb->end = data + size; 593 skb->data += off; 594 skb->tail += off; 595 skb->mac.raw += off; 596 skb->h.raw += off; 597 skb->nh.raw += off; 598 skb->cloned = 0; 599 skb->nohdr = 0; 600 atomic_set(&skb_shinfo(skb)->dataref, 1); 601 return 0; 602 603 nodata: 604 return -ENOMEM; 605 } 606 607 /* Make private copy of skb with writable head and some headroom */ 608 609 struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) 610 { 611 struct sk_buff *skb2; 612 int delta = headroom - skb_headroom(skb); 613 614 if (delta <= 0) 615 skb2 = pskb_copy(skb, GFP_ATOMIC); 616 else { 617 skb2 = skb_clone(skb, GFP_ATOMIC); 618 if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, 619 GFP_ATOMIC)) { 620 kfree_skb(skb2); 621 skb2 = NULL; 622 } 623 } 624 return skb2; 625 } 626 627 628 /** 629 * skb_copy_expand - copy and expand sk_buff 630 * @skb: buffer to copy 631 * @newheadroom: new free bytes at head 632 * @newtailroom: new free bytes at tail 633 * @gfp_mask: allocation priority 634 * 635 * Make a copy of both an &sk_buff and its data and while doing so 636 * allocate additional space. 637 * 638 * This is used when the caller wishes to modify the data and needs a 639 * private copy of the data to alter as well as more space for new fields. 640 * Returns %NULL on failure or the pointer to the buffer 641 * on success. The returned buffer has a reference count of 1. 642 * 643 * You must pass %GFP_ATOMIC as the allocation priority if this function 644 * is called from an interrupt. 645 * 646 * BUG ALERT: ip_summed is not copied. Why does this work? Is it used 647 * only by netfilter in the cases when checksum is recalculated? --ANK 648 */ 649 struct sk_buff *skb_copy_expand(const struct sk_buff *skb, 650 int newheadroom, int newtailroom, int gfp_mask) 651 { 652 /* 653 * Allocate the copy buffer 654 */ 655 struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom, 656 gfp_mask); 657 int head_copy_len, head_copy_off; 658 659 if (!n) 660 return NULL; 661 662 skb_reserve(n, newheadroom); 663 664 /* Set the tail pointer and length */ 665 skb_put(n, skb->len); 666 667 head_copy_len = skb_headroom(skb); 668 head_copy_off = 0; 669 if (newheadroom <= head_copy_len) 670 head_copy_len = newheadroom; 671 else 672 head_copy_off = newheadroom - head_copy_len; 673 674 /* Copy the linear header and data. */ 675 if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off, 676 skb->len + head_copy_len)) 677 BUG(); 678 679 copy_skb_header(n, skb); 680 681 return n; 682 } 683 684 /** 685 * skb_pad - zero pad the tail of an skb 686 * @skb: buffer to pad 687 * @pad: space to pad 688 * 689 * Ensure that a buffer is followed by a padding area that is zero 690 * filled. Used by network drivers which may DMA or transfer data 691 * beyond the buffer end onto the wire. 692 * 693 * May return NULL in out of memory cases. 694 */ 695 696 struct sk_buff *skb_pad(struct sk_buff *skb, int pad) 697 { 698 struct sk_buff *nskb; 699 700 /* If the skbuff is non linear tailroom is always zero.. */ 701 if (skb_tailroom(skb) >= pad) { 702 memset(skb->data+skb->len, 0, pad); 703 return skb; 704 } 705 706 nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + pad, GFP_ATOMIC); 707 kfree_skb(skb); 708 if (nskb) 709 memset(nskb->data+nskb->len, 0, pad); 710 return nskb; 711 } 712 713 /* Trims skb to length len. It can change skb pointers, if "realloc" is 1. 714 * If realloc==0 and trimming is impossible without change of data, 715 * it is BUG(). 716 */ 717 718 int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc) 719 { 720 int offset = skb_headlen(skb); 721 int nfrags = skb_shinfo(skb)->nr_frags; 722 int i; 723 724 for (i = 0; i < nfrags; i++) { 725 int end = offset + skb_shinfo(skb)->frags[i].size; 726 if (end > len) { 727 if (skb_cloned(skb)) { 728 if (!realloc) 729 BUG(); 730 if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) 731 return -ENOMEM; 732 } 733 if (len <= offset) { 734 put_page(skb_shinfo(skb)->frags[i].page); 735 skb_shinfo(skb)->nr_frags--; 736 } else { 737 skb_shinfo(skb)->frags[i].size = len - offset; 738 } 739 } 740 offset = end; 741 } 742 743 if (offset < len) { 744 skb->data_len -= skb->len - len; 745 skb->len = len; 746 } else { 747 if (len <= skb_headlen(skb)) { 748 skb->len = len; 749 skb->data_len = 0; 750 skb->tail = skb->data + len; 751 if (skb_shinfo(skb)->frag_list && !skb_cloned(skb)) 752 skb_drop_fraglist(skb); 753 } else { 754 skb->data_len -= skb->len - len; 755 skb->len = len; 756 } 757 } 758 759 return 0; 760 } 761 762 /** 763 * __pskb_pull_tail - advance tail of skb header 764 * @skb: buffer to reallocate 765 * @delta: number of bytes to advance tail 766 * 767 * The function makes a sense only on a fragmented &sk_buff, 768 * it expands header moving its tail forward and copying necessary 769 * data from fragmented part. 770 * 771 * &sk_buff MUST have reference count of 1. 772 * 773 * Returns %NULL (and &sk_buff does not change) if pull failed 774 * or value of new tail of skb in the case of success. 775 * 776 * All the pointers pointing into skb header may change and must be 777 * reloaded after call to this function. 778 */ 779 780 /* Moves tail of skb head forward, copying data from fragmented part, 781 * when it is necessary. 782 * 1. It may fail due to malloc failure. 783 * 2. It may change skb pointers. 784 * 785 * It is pretty complicated. Luckily, it is called only in exceptional cases. 786 */ 787 unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta) 788 { 789 /* If skb has not enough free space at tail, get new one 790 * plus 128 bytes for future expansions. If we have enough 791 * room at tail, reallocate without expansion only if skb is cloned. 792 */ 793 int i, k, eat = (skb->tail + delta) - skb->end; 794 795 if (eat > 0 || skb_cloned(skb)) { 796 if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0, 797 GFP_ATOMIC)) 798 return NULL; 799 } 800 801 if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta)) 802 BUG(); 803 804 /* Optimization: no fragments, no reasons to preestimate 805 * size of pulled pages. Superb. 806 */ 807 if (!skb_shinfo(skb)->frag_list) 808 goto pull_pages; 809 810 /* Estimate size of pulled pages. */ 811 eat = delta; 812 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 813 if (skb_shinfo(skb)->frags[i].size >= eat) 814 goto pull_pages; 815 eat -= skb_shinfo(skb)->frags[i].size; 816 } 817 818 /* If we need update frag list, we are in troubles. 819 * Certainly, it possible to add an offset to skb data, 820 * but taking into account that pulling is expected to 821 * be very rare operation, it is worth to fight against 822 * further bloating skb head and crucify ourselves here instead. 823 * Pure masohism, indeed. 8)8) 824 */ 825 if (eat) { 826 struct sk_buff *list = skb_shinfo(skb)->frag_list; 827 struct sk_buff *clone = NULL; 828 struct sk_buff *insp = NULL; 829 830 do { 831 if (!list) 832 BUG(); 833 834 if (list->len <= eat) { 835 /* Eaten as whole. */ 836 eat -= list->len; 837 list = list->next; 838 insp = list; 839 } else { 840 /* Eaten partially. */ 841 842 if (skb_shared(list)) { 843 /* Sucks! We need to fork list. :-( */ 844 clone = skb_clone(list, GFP_ATOMIC); 845 if (!clone) 846 return NULL; 847 insp = list->next; 848 list = clone; 849 } else { 850 /* This may be pulled without 851 * problems. */ 852 insp = list; 853 } 854 if (!pskb_pull(list, eat)) { 855 if (clone) 856 kfree_skb(clone); 857 return NULL; 858 } 859 break; 860 } 861 } while (eat); 862 863 /* Free pulled out fragments. */ 864 while ((list = skb_shinfo(skb)->frag_list) != insp) { 865 skb_shinfo(skb)->frag_list = list->next; 866 kfree_skb(list); 867 } 868 /* And insert new clone at head. */ 869 if (clone) { 870 clone->next = list; 871 skb_shinfo(skb)->frag_list = clone; 872 } 873 } 874 /* Success! Now we may commit changes to skb data. */ 875 876 pull_pages: 877 eat = delta; 878 k = 0; 879 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 880 if (skb_shinfo(skb)->frags[i].size <= eat) { 881 put_page(skb_shinfo(skb)->frags[i].page); 882 eat -= skb_shinfo(skb)->frags[i].size; 883 } else { 884 skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; 885 if (eat) { 886 skb_shinfo(skb)->frags[k].page_offset += eat; 887 skb_shinfo(skb)->frags[k].size -= eat; 888 eat = 0; 889 } 890 k++; 891 } 892 } 893 skb_shinfo(skb)->nr_frags = k; 894 895 skb->tail += delta; 896 skb->data_len -= delta; 897 898 return skb->tail; 899 } 900 901 /* Copy some data bits from skb to kernel buffer. */ 902 903 int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) 904 { 905 int i, copy; 906 int start = skb_headlen(skb); 907 908 if (offset > (int)skb->len - len) 909 goto fault; 910 911 /* Copy header. */ 912 if ((copy = start - offset) > 0) { 913 if (copy > len) 914 copy = len; 915 memcpy(to, skb->data + offset, copy); 916 if ((len -= copy) == 0) 917 return 0; 918 offset += copy; 919 to += copy; 920 } 921 922 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 923 int end; 924 925 BUG_TRAP(start <= offset + len); 926 927 end = start + skb_shinfo(skb)->frags[i].size; 928 if ((copy = end - offset) > 0) { 929 u8 *vaddr; 930 931 if (copy > len) 932 copy = len; 933 934 vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); 935 memcpy(to, 936 vaddr + skb_shinfo(skb)->frags[i].page_offset+ 937 offset - start, copy); 938 kunmap_skb_frag(vaddr); 939 940 if ((len -= copy) == 0) 941 return 0; 942 offset += copy; 943 to += copy; 944 } 945 start = end; 946 } 947 948 if (skb_shinfo(skb)->frag_list) { 949 struct sk_buff *list = skb_shinfo(skb)->frag_list; 950 951 for (; list; list = list->next) { 952 int end; 953 954 BUG_TRAP(start <= offset + len); 955 956 end = start + list->len; 957 if ((copy = end - offset) > 0) { 958 if (copy > len) 959 copy = len; 960 if (skb_copy_bits(list, offset - start, 961 to, copy)) 962 goto fault; 963 if ((len -= copy) == 0) 964 return 0; 965 offset += copy; 966 to += copy; 967 } 968 start = end; 969 } 970 } 971 if (!len) 972 return 0; 973 974 fault: 975 return -EFAULT; 976 } 977 978 /** 979 * skb_store_bits - store bits from kernel buffer to skb 980 * @skb: destination buffer 981 * @offset: offset in destination 982 * @from: source buffer 983 * @len: number of bytes to copy 984 * 985 * Copy the specified number of bytes from the source buffer to the 986 * destination skb. This function handles all the messy bits of 987 * traversing fragment lists and such. 988 */ 989 990 int skb_store_bits(const struct sk_buff *skb, int offset, void *from, int len) 991 { 992 int i, copy; 993 int start = skb_headlen(skb); 994 995 if (offset > (int)skb->len - len) 996 goto fault; 997 998 if ((copy = start - offset) > 0) { 999 if (copy > len) 1000 copy = len; 1001 memcpy(skb->data + offset, from, copy); 1002 if ((len -= copy) == 0) 1003 return 0; 1004 offset += copy; 1005 from += copy; 1006 } 1007 1008 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1009 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 1010 int end; 1011 1012 BUG_TRAP(start <= offset + len); 1013 1014 end = start + frag->size; 1015 if ((copy = end - offset) > 0) { 1016 u8 *vaddr; 1017 1018 if (copy > len) 1019 copy = len; 1020 1021 vaddr = kmap_skb_frag(frag); 1022 memcpy(vaddr + frag->page_offset + offset - start, 1023 from, copy); 1024 kunmap_skb_frag(vaddr); 1025 1026 if ((len -= copy) == 0) 1027 return 0; 1028 offset += copy; 1029 from += copy; 1030 } 1031 start = end; 1032 } 1033 1034 if (skb_shinfo(skb)->frag_list) { 1035 struct sk_buff *list = skb_shinfo(skb)->frag_list; 1036 1037 for (; list; list = list->next) { 1038 int end; 1039 1040 BUG_TRAP(start <= offset + len); 1041 1042 end = start + list->len; 1043 if ((copy = end - offset) > 0) { 1044 if (copy > len) 1045 copy = len; 1046 if (skb_store_bits(list, offset - start, 1047 from, copy)) 1048 goto fault; 1049 if ((len -= copy) == 0) 1050 return 0; 1051 offset += copy; 1052 from += copy; 1053 } 1054 start = end; 1055 } 1056 } 1057 if (!len) 1058 return 0; 1059 1060 fault: 1061 return -EFAULT; 1062 } 1063 1064 EXPORT_SYMBOL(skb_store_bits); 1065 1066 /* Checksum skb data. */ 1067 1068 unsigned int skb_checksum(const struct sk_buff *skb, int offset, 1069 int len, unsigned int csum) 1070 { 1071 int start = skb_headlen(skb); 1072 int i, copy = start - offset; 1073 int pos = 0; 1074 1075 /* Checksum header. */ 1076 if (copy > 0) { 1077 if (copy > len) 1078 copy = len; 1079 csum = csum_partial(skb->data + offset, copy, csum); 1080 if ((len -= copy) == 0) 1081 return csum; 1082 offset += copy; 1083 pos = copy; 1084 } 1085 1086 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1087 int end; 1088 1089 BUG_TRAP(start <= offset + len); 1090 1091 end = start + skb_shinfo(skb)->frags[i].size; 1092 if ((copy = end - offset) > 0) { 1093 unsigned int csum2; 1094 u8 *vaddr; 1095 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 1096 1097 if (copy > len) 1098 copy = len; 1099 vaddr = kmap_skb_frag(frag); 1100 csum2 = csum_partial(vaddr + frag->page_offset + 1101 offset - start, copy, 0); 1102 kunmap_skb_frag(vaddr); 1103 csum = csum_block_add(csum, csum2, pos); 1104 if (!(len -= copy)) 1105 return csum; 1106 offset += copy; 1107 pos += copy; 1108 } 1109 start = end; 1110 } 1111 1112 if (skb_shinfo(skb)->frag_list) { 1113 struct sk_buff *list = skb_shinfo(skb)->frag_list; 1114 1115 for (; list; list = list->next) { 1116 int end; 1117 1118 BUG_TRAP(start <= offset + len); 1119 1120 end = start + list->len; 1121 if ((copy = end - offset) > 0) { 1122 unsigned int csum2; 1123 if (copy > len) 1124 copy = len; 1125 csum2 = skb_checksum(list, offset - start, 1126 copy, 0); 1127 csum = csum_block_add(csum, csum2, pos); 1128 if ((len -= copy) == 0) 1129 return csum; 1130 offset += copy; 1131 pos += copy; 1132 } 1133 start = end; 1134 } 1135 } 1136 if (len) 1137 BUG(); 1138 1139 return csum; 1140 } 1141 1142 /* Both of above in one bottle. */ 1143 1144 unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, 1145 u8 *to, int len, unsigned int csum) 1146 { 1147 int start = skb_headlen(skb); 1148 int i, copy = start - offset; 1149 int pos = 0; 1150 1151 /* Copy header. */ 1152 if (copy > 0) { 1153 if (copy > len) 1154 copy = len; 1155 csum = csum_partial_copy_nocheck(skb->data + offset, to, 1156 copy, csum); 1157 if ((len -= copy) == 0) 1158 return csum; 1159 offset += copy; 1160 to += copy; 1161 pos = copy; 1162 } 1163 1164 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1165 int end; 1166 1167 BUG_TRAP(start <= offset + len); 1168 1169 end = start + skb_shinfo(skb)->frags[i].size; 1170 if ((copy = end - offset) > 0) { 1171 unsigned int csum2; 1172 u8 *vaddr; 1173 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 1174 1175 if (copy > len) 1176 copy = len; 1177 vaddr = kmap_skb_frag(frag); 1178 csum2 = csum_partial_copy_nocheck(vaddr + 1179 frag->page_offset + 1180 offset - start, to, 1181 copy, 0); 1182 kunmap_skb_frag(vaddr); 1183 csum = csum_block_add(csum, csum2, pos); 1184 if (!(len -= copy)) 1185 return csum; 1186 offset += copy; 1187 to += copy; 1188 pos += copy; 1189 } 1190 start = end; 1191 } 1192 1193 if (skb_shinfo(skb)->frag_list) { 1194 struct sk_buff *list = skb_shinfo(skb)->frag_list; 1195 1196 for (; list; list = list->next) { 1197 unsigned int csum2; 1198 int end; 1199 1200 BUG_TRAP(start <= offset + len); 1201 1202 end = start + list->len; 1203 if ((copy = end - offset) > 0) { 1204 if (copy > len) 1205 copy = len; 1206 csum2 = skb_copy_and_csum_bits(list, 1207 offset - start, 1208 to, copy, 0); 1209 csum = csum_block_add(csum, csum2, pos); 1210 if ((len -= copy) == 0) 1211 return csum; 1212 offset += copy; 1213 to += copy; 1214 pos += copy; 1215 } 1216 start = end; 1217 } 1218 } 1219 if (len) 1220 BUG(); 1221 return csum; 1222 } 1223 1224 void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) 1225 { 1226 unsigned int csum; 1227 long csstart; 1228 1229 if (skb->ip_summed == CHECKSUM_HW) 1230 csstart = skb->h.raw - skb->data; 1231 else 1232 csstart = skb_headlen(skb); 1233 1234 if (csstart > skb_headlen(skb)) 1235 BUG(); 1236 1237 memcpy(to, skb->data, csstart); 1238 1239 csum = 0; 1240 if (csstart != skb->len) 1241 csum = skb_copy_and_csum_bits(skb, csstart, to + csstart, 1242 skb->len - csstart, 0); 1243 1244 if (skb->ip_summed == CHECKSUM_HW) { 1245 long csstuff = csstart + skb->csum; 1246 1247 *((unsigned short *)(to + csstuff)) = csum_fold(csum); 1248 } 1249 } 1250 1251 /** 1252 * skb_dequeue - remove from the head of the queue 1253 * @list: list to dequeue from 1254 * 1255 * Remove the head of the list. The list lock is taken so the function 1256 * may be used safely with other locking list functions. The head item is 1257 * returned or %NULL if the list is empty. 1258 */ 1259 1260 struct sk_buff *skb_dequeue(struct sk_buff_head *list) 1261 { 1262 unsigned long flags; 1263 struct sk_buff *result; 1264 1265 spin_lock_irqsave(&list->lock, flags); 1266 result = __skb_dequeue(list); 1267 spin_unlock_irqrestore(&list->lock, flags); 1268 return result; 1269 } 1270 1271 /** 1272 * skb_dequeue_tail - remove from the tail of the queue 1273 * @list: list to dequeue from 1274 * 1275 * Remove the tail of the list. The list lock is taken so the function 1276 * may be used safely with other locking list functions. The tail item is 1277 * returned or %NULL if the list is empty. 1278 */ 1279 struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list) 1280 { 1281 unsigned long flags; 1282 struct sk_buff *result; 1283 1284 spin_lock_irqsave(&list->lock, flags); 1285 result = __skb_dequeue_tail(list); 1286 spin_unlock_irqrestore(&list->lock, flags); 1287 return result; 1288 } 1289 1290 /** 1291 * skb_queue_purge - empty a list 1292 * @list: list to empty 1293 * 1294 * Delete all buffers on an &sk_buff list. Each buffer is removed from 1295 * the list and one reference dropped. This function takes the list 1296 * lock and is atomic with respect to other list locking functions. 1297 */ 1298 void skb_queue_purge(struct sk_buff_head *list) 1299 { 1300 struct sk_buff *skb; 1301 while ((skb = skb_dequeue(list)) != NULL) 1302 kfree_skb(skb); 1303 } 1304 1305 /** 1306 * skb_queue_head - queue a buffer at the list head 1307 * @list: list to use 1308 * @newsk: buffer to queue 1309 * 1310 * Queue a buffer at the start of the list. This function takes the 1311 * list lock and can be used safely with other locking &sk_buff functions 1312 * safely. 1313 * 1314 * A buffer cannot be placed on two lists at the same time. 1315 */ 1316 void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) 1317 { 1318 unsigned long flags; 1319 1320 spin_lock_irqsave(&list->lock, flags); 1321 __skb_queue_head(list, newsk); 1322 spin_unlock_irqrestore(&list->lock, flags); 1323 } 1324 1325 /** 1326 * skb_queue_tail - queue a buffer at the list tail 1327 * @list: list to use 1328 * @newsk: buffer to queue 1329 * 1330 * Queue a buffer at the tail of the list. This function takes the 1331 * list lock and can be used safely with other locking &sk_buff functions 1332 * safely. 1333 * 1334 * A buffer cannot be placed on two lists at the same time. 1335 */ 1336 void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) 1337 { 1338 unsigned long flags; 1339 1340 spin_lock_irqsave(&list->lock, flags); 1341 __skb_queue_tail(list, newsk); 1342 spin_unlock_irqrestore(&list->lock, flags); 1343 } 1344 /** 1345 * skb_unlink - remove a buffer from a list 1346 * @skb: buffer to remove 1347 * 1348 * Place a packet after a given packet in a list. The list locks are taken 1349 * and this function is atomic with respect to other list locked calls 1350 * 1351 * Works even without knowing the list it is sitting on, which can be 1352 * handy at times. It also means that THE LIST MUST EXIST when you 1353 * unlink. Thus a list must have its contents unlinked before it is 1354 * destroyed. 1355 */ 1356 void skb_unlink(struct sk_buff *skb) 1357 { 1358 struct sk_buff_head *list = skb->list; 1359 1360 if (list) { 1361 unsigned long flags; 1362 1363 spin_lock_irqsave(&list->lock, flags); 1364 if (skb->list == list) 1365 __skb_unlink(skb, skb->list); 1366 spin_unlock_irqrestore(&list->lock, flags); 1367 } 1368 } 1369 1370 1371 /** 1372 * skb_append - append a buffer 1373 * @old: buffer to insert after 1374 * @newsk: buffer to insert 1375 * 1376 * Place a packet after a given packet in a list. The list locks are taken 1377 * and this function is atomic with respect to other list locked calls. 1378 * A buffer cannot be placed on two lists at the same time. 1379 */ 1380 1381 void skb_append(struct sk_buff *old, struct sk_buff *newsk) 1382 { 1383 unsigned long flags; 1384 1385 spin_lock_irqsave(&old->list->lock, flags); 1386 __skb_append(old, newsk); 1387 spin_unlock_irqrestore(&old->list->lock, flags); 1388 } 1389 1390 1391 /** 1392 * skb_insert - insert a buffer 1393 * @old: buffer to insert before 1394 * @newsk: buffer to insert 1395 * 1396 * Place a packet before a given packet in a list. The list locks are taken 1397 * and this function is atomic with respect to other list locked calls 1398 * A buffer cannot be placed on two lists at the same time. 1399 */ 1400 1401 void skb_insert(struct sk_buff *old, struct sk_buff *newsk) 1402 { 1403 unsigned long flags; 1404 1405 spin_lock_irqsave(&old->list->lock, flags); 1406 __skb_insert(newsk, old->prev, old, old->list); 1407 spin_unlock_irqrestore(&old->list->lock, flags); 1408 } 1409 1410 #if 0 1411 /* 1412 * Tune the memory allocator for a new MTU size. 1413 */ 1414 void skb_add_mtu(int mtu) 1415 { 1416 /* Must match allocation in alloc_skb */ 1417 mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info); 1418 1419 kmem_add_cache_size(mtu); 1420 } 1421 #endif 1422 1423 static inline void skb_split_inside_header(struct sk_buff *skb, 1424 struct sk_buff* skb1, 1425 const u32 len, const int pos) 1426 { 1427 int i; 1428 1429 memcpy(skb_put(skb1, pos - len), skb->data + len, pos - len); 1430 1431 /* And move data appendix as is. */ 1432 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 1433 skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i]; 1434 1435 skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags; 1436 skb_shinfo(skb)->nr_frags = 0; 1437 skb1->data_len = skb->data_len; 1438 skb1->len += skb1->data_len; 1439 skb->data_len = 0; 1440 skb->len = len; 1441 skb->tail = skb->data + len; 1442 } 1443 1444 static inline void skb_split_no_header(struct sk_buff *skb, 1445 struct sk_buff* skb1, 1446 const u32 len, int pos) 1447 { 1448 int i, k = 0; 1449 const int nfrags = skb_shinfo(skb)->nr_frags; 1450 1451 skb_shinfo(skb)->nr_frags = 0; 1452 skb1->len = skb1->data_len = skb->len - len; 1453 skb->len = len; 1454 skb->data_len = len - pos; 1455 1456 for (i = 0; i < nfrags; i++) { 1457 int size = skb_shinfo(skb)->frags[i].size; 1458 1459 if (pos + size > len) { 1460 skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i]; 1461 1462 if (pos < len) { 1463 /* Split frag. 1464 * We have two variants in this case: 1465 * 1. Move all the frag to the second 1466 * part, if it is possible. F.e. 1467 * this approach is mandatory for TUX, 1468 * where splitting is expensive. 1469 * 2. Split is accurately. We make this. 1470 */ 1471 get_page(skb_shinfo(skb)->frags[i].page); 1472 skb_shinfo(skb1)->frags[0].page_offset += len - pos; 1473 skb_shinfo(skb1)->frags[0].size -= len - pos; 1474 skb_shinfo(skb)->frags[i].size = len - pos; 1475 skb_shinfo(skb)->nr_frags++; 1476 } 1477 k++; 1478 } else 1479 skb_shinfo(skb)->nr_frags++; 1480 pos += size; 1481 } 1482 skb_shinfo(skb1)->nr_frags = k; 1483 } 1484 1485 /** 1486 * skb_split - Split fragmented skb to two parts at length len. 1487 * @skb: the buffer to split 1488 * @skb1: the buffer to receive the second part 1489 * @len: new length for skb 1490 */ 1491 void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) 1492 { 1493 int pos = skb_headlen(skb); 1494 1495 if (len < pos) /* Split line is inside header. */ 1496 skb_split_inside_header(skb, skb1, len, pos); 1497 else /* Second chunk has no header, nothing to copy. */ 1498 skb_split_no_header(skb, skb1, len, pos); 1499 } 1500 1501 /** 1502 * skb_prepare_seq_read - Prepare a sequential read of skb data 1503 * @skb: the buffer to read 1504 * @from: lower offset of data to be read 1505 * @to: upper offset of data to be read 1506 * @st: state variable 1507 * 1508 * Initializes the specified state variable. Must be called before 1509 * invoking skb_seq_read() for the first time. 1510 */ 1511 void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, 1512 unsigned int to, struct skb_seq_state *st) 1513 { 1514 st->lower_offset = from; 1515 st->upper_offset = to; 1516 st->root_skb = st->cur_skb = skb; 1517 st->frag_idx = st->stepped_offset = 0; 1518 st->frag_data = NULL; 1519 } 1520 1521 /** 1522 * skb_seq_read - Sequentially read skb data 1523 * @consumed: number of bytes consumed by the caller so far 1524 * @data: destination pointer for data to be returned 1525 * @st: state variable 1526 * 1527 * Reads a block of skb data at &consumed relative to the 1528 * lower offset specified to skb_prepare_seq_read(). Assigns 1529 * the head of the data block to &data and returns the length 1530 * of the block or 0 if the end of the skb data or the upper 1531 * offset has been reached. 1532 * 1533 * The caller is not required to consume all of the data 1534 * returned, i.e. &consumed is typically set to the number 1535 * of bytes already consumed and the next call to 1536 * skb_seq_read() will return the remaining part of the block. 1537 * 1538 * Note: The size of each block of data returned can be arbitary, 1539 * this limitation is the cost for zerocopy seqeuental 1540 * reads of potentially non linear data. 1541 * 1542 * Note: Fragment lists within fragments are not implemented 1543 * at the moment, state->root_skb could be replaced with 1544 * a stack for this purpose. 1545 */ 1546 unsigned int skb_seq_read(unsigned int consumed, const u8 **data, 1547 struct skb_seq_state *st) 1548 { 1549 unsigned int block_limit, abs_offset = consumed + st->lower_offset; 1550 skb_frag_t *frag; 1551 1552 if (unlikely(abs_offset >= st->upper_offset)) 1553 return 0; 1554 1555 next_skb: 1556 block_limit = skb_headlen(st->cur_skb); 1557 1558 if (abs_offset < block_limit) { 1559 *data = st->cur_skb->data + abs_offset; 1560 return block_limit - abs_offset; 1561 } 1562 1563 if (st->frag_idx == 0 && !st->frag_data) 1564 st->stepped_offset += skb_headlen(st->cur_skb); 1565 1566 while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) { 1567 frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx]; 1568 block_limit = frag->size + st->stepped_offset; 1569 1570 if (abs_offset < block_limit) { 1571 if (!st->frag_data) 1572 st->frag_data = kmap_skb_frag(frag); 1573 1574 *data = (u8 *) st->frag_data + frag->page_offset + 1575 (abs_offset - st->stepped_offset); 1576 1577 return block_limit - abs_offset; 1578 } 1579 1580 if (st->frag_data) { 1581 kunmap_skb_frag(st->frag_data); 1582 st->frag_data = NULL; 1583 } 1584 1585 st->frag_idx++; 1586 st->stepped_offset += frag->size; 1587 } 1588 1589 if (st->cur_skb->next) { 1590 st->cur_skb = st->cur_skb->next; 1591 st->frag_idx = 0; 1592 goto next_skb; 1593 } else if (st->root_skb == st->cur_skb && 1594 skb_shinfo(st->root_skb)->frag_list) { 1595 st->cur_skb = skb_shinfo(st->root_skb)->frag_list; 1596 goto next_skb; 1597 } 1598 1599 return 0; 1600 } 1601 1602 /** 1603 * skb_abort_seq_read - Abort a sequential read of skb data 1604 * @st: state variable 1605 * 1606 * Must be called if skb_seq_read() was not called until it 1607 * returned 0. 1608 */ 1609 void skb_abort_seq_read(struct skb_seq_state *st) 1610 { 1611 if (st->frag_data) 1612 kunmap_skb_frag(st->frag_data); 1613 } 1614 1615 #define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb)) 1616 1617 static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text, 1618 struct ts_config *conf, 1619 struct ts_state *state) 1620 { 1621 return skb_seq_read(offset, text, TS_SKB_CB(state)); 1622 } 1623 1624 static void skb_ts_finish(struct ts_config *conf, struct ts_state *state) 1625 { 1626 skb_abort_seq_read(TS_SKB_CB(state)); 1627 } 1628 1629 /** 1630 * skb_find_text - Find a text pattern in skb data 1631 * @skb: the buffer to look in 1632 * @from: search offset 1633 * @to: search limit 1634 * @config: textsearch configuration 1635 * @state: uninitialized textsearch state variable 1636 * 1637 * Finds a pattern in the skb data according to the specified 1638 * textsearch configuration. Use textsearch_next() to retrieve 1639 * subsequent occurrences of the pattern. Returns the offset 1640 * to the first occurrence or UINT_MAX if no match was found. 1641 */ 1642 unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, 1643 unsigned int to, struct ts_config *config, 1644 struct ts_state *state) 1645 { 1646 config->get_next_block = skb_ts_get_next_block; 1647 config->finish = skb_ts_finish; 1648 1649 skb_prepare_seq_read(skb, from, to, TS_SKB_CB(state)); 1650 1651 return textsearch_find(config, state); 1652 } 1653 1654 void __init skb_init(void) 1655 { 1656 skbuff_head_cache = kmem_cache_create("skbuff_head_cache", 1657 sizeof(struct sk_buff), 1658 0, 1659 SLAB_HWCACHE_ALIGN, 1660 NULL, NULL); 1661 if (!skbuff_head_cache) 1662 panic("cannot create skbuff cache"); 1663 } 1664 1665 EXPORT_SYMBOL(___pskb_trim); 1666 EXPORT_SYMBOL(__kfree_skb); 1667 EXPORT_SYMBOL(__pskb_pull_tail); 1668 EXPORT_SYMBOL(alloc_skb); 1669 EXPORT_SYMBOL(pskb_copy); 1670 EXPORT_SYMBOL(pskb_expand_head); 1671 EXPORT_SYMBOL(skb_checksum); 1672 EXPORT_SYMBOL(skb_clone); 1673 EXPORT_SYMBOL(skb_clone_fraglist); 1674 EXPORT_SYMBOL(skb_copy); 1675 EXPORT_SYMBOL(skb_copy_and_csum_bits); 1676 EXPORT_SYMBOL(skb_copy_and_csum_dev); 1677 EXPORT_SYMBOL(skb_copy_bits); 1678 EXPORT_SYMBOL(skb_copy_expand); 1679 EXPORT_SYMBOL(skb_over_panic); 1680 EXPORT_SYMBOL(skb_pad); 1681 EXPORT_SYMBOL(skb_realloc_headroom); 1682 EXPORT_SYMBOL(skb_under_panic); 1683 EXPORT_SYMBOL(skb_dequeue); 1684 EXPORT_SYMBOL(skb_dequeue_tail); 1685 EXPORT_SYMBOL(skb_insert); 1686 EXPORT_SYMBOL(skb_queue_purge); 1687 EXPORT_SYMBOL(skb_queue_head); 1688 EXPORT_SYMBOL(skb_queue_tail); 1689 EXPORT_SYMBOL(skb_unlink); 1690 EXPORT_SYMBOL(skb_append); 1691 EXPORT_SYMBOL(skb_split); 1692 EXPORT_SYMBOL(skb_prepare_seq_read); 1693 EXPORT_SYMBOL(skb_seq_read); 1694 EXPORT_SYMBOL(skb_abort_seq_read); 1695 EXPORT_SYMBOL(skb_find_text); 1696