1 /* 2 * Routines having to do with the 'struct sk_buff' memory handlers. 3 * 4 * Authors: Alan Cox <iiitac@pyr.swan.ac.uk> 5 * Florian La Roche <rzsfl@rz.uni-sb.de> 6 * 7 * Version: $Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $ 8 * 9 * Fixes: 10 * Alan Cox : Fixed the worst of the load 11 * balancer bugs. 12 * Dave Platt : Interrupt stacking fix. 13 * Richard Kooijman : Timestamp fixes. 14 * Alan Cox : Changed buffer format. 15 * Alan Cox : destructor hook for AF_UNIX etc. 16 * Linus Torvalds : Better skb_clone. 17 * Alan Cox : Added skb_copy. 18 * Alan Cox : Added all the changed routines Linus 19 * only put in the headers 20 * Ray VanTassle : Fixed --skb->lock in free 21 * Alan Cox : skb_copy copy arp field 22 * Andi Kleen : slabified it. 23 * Robert Olsson : Removed skb_head_pool 24 * 25 * NOTE: 26 * The __skb_ routines should be called with interrupts 27 * disabled, or you better be *real* sure that the operation is atomic 28 * with respect to whatever list is being frobbed (e.g. via lock_sock() 29 * or via disabling bottom half handlers, etc). 30 * 31 * This program is free software; you can redistribute it and/or 32 * modify it under the terms of the GNU General Public License 33 * as published by the Free Software Foundation; either version 34 * 2 of the License, or (at your option) any later version. 35 */ 36 37 /* 38 * The functions in this file will not compile correctly with gcc 2.4.x 39 */ 40 41 #include <linux/config.h> 42 #include <linux/module.h> 43 #include <linux/types.h> 44 #include <linux/kernel.h> 45 #include <linux/sched.h> 46 #include <linux/mm.h> 47 #include <linux/interrupt.h> 48 #include <linux/in.h> 49 #include <linux/inet.h> 50 #include <linux/slab.h> 51 #include <linux/netdevice.h> 52 #ifdef CONFIG_NET_CLS_ACT 53 #include <net/pkt_sched.h> 54 #endif 55 #include <linux/string.h> 56 #include <linux/skbuff.h> 57 #include <linux/cache.h> 58 #include <linux/rtnetlink.h> 59 #include <linux/init.h> 60 #include <linux/highmem.h> 61 62 #include <net/protocol.h> 63 #include <net/dst.h> 64 #include <net/sock.h> 65 #include <net/checksum.h> 66 #include <net/xfrm.h> 67 68 #include <asm/uaccess.h> 69 #include <asm/system.h> 70 71 static kmem_cache_t *skbuff_head_cache __read_mostly; 72 static kmem_cache_t *skbuff_fclone_cache __read_mostly; 73 74 /* 75 * Keep out-of-line to prevent kernel bloat. 76 * __builtin_return_address is not used because it is not always 77 * reliable. 78 */ 79 80 /** 81 * skb_over_panic - private function 82 * @skb: buffer 83 * @sz: size 84 * @here: address 85 * 86 * Out of line support code for skb_put(). Not user callable. 87 */ 88 void skb_over_panic(struct sk_buff *skb, int sz, void *here) 89 { 90 printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p " 91 "data:%p tail:%p end:%p dev:%s\n", 92 here, skb->len, sz, skb->head, skb->data, skb->tail, skb->end, 93 skb->dev ? skb->dev->name : "<NULL>"); 94 BUG(); 95 } 96 97 /** 98 * skb_under_panic - private function 99 * @skb: buffer 100 * @sz: size 101 * @here: address 102 * 103 * Out of line support code for skb_push(). Not user callable. 104 */ 105 106 void skb_under_panic(struct sk_buff *skb, int sz, void *here) 107 { 108 printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p " 109 "data:%p tail:%p end:%p dev:%s\n", 110 here, skb->len, sz, skb->head, skb->data, skb->tail, skb->end, 111 skb->dev ? skb->dev->name : "<NULL>"); 112 BUG(); 113 } 114 115 /* Allocate a new skbuff. We do this ourselves so we can fill in a few 116 * 'private' fields and also do memory statistics to find all the 117 * [BEEP] leaks. 118 * 119 */ 120 121 /** 122 * __alloc_skb - allocate a network buffer 123 * @size: size to allocate 124 * @gfp_mask: allocation mask 125 * @fclone: allocate from fclone cache instead of head cache 126 * and allocate a cloned (child) skb 127 * 128 * Allocate a new &sk_buff. The returned buffer has no headroom and a 129 * tail room of size bytes. The object has a reference count of one. 130 * The return is the buffer. On a failure the return is %NULL. 131 * 132 * Buffers may only be allocated from interrupts using a @gfp_mask of 133 * %GFP_ATOMIC. 134 */ 135 struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, 136 int fclone) 137 { 138 struct sk_buff *skb; 139 u8 *data; 140 141 /* Get the HEAD */ 142 if (fclone) 143 skb = kmem_cache_alloc(skbuff_fclone_cache, 144 gfp_mask & ~__GFP_DMA); 145 else 146 skb = kmem_cache_alloc(skbuff_head_cache, 147 gfp_mask & ~__GFP_DMA); 148 149 if (!skb) 150 goto out; 151 152 /* Get the DATA. Size must match skb_add_mtu(). */ 153 size = SKB_DATA_ALIGN(size); 154 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); 155 if (!data) 156 goto nodata; 157 158 memset(skb, 0, offsetof(struct sk_buff, truesize)); 159 skb->truesize = size + sizeof(struct sk_buff); 160 atomic_set(&skb->users, 1); 161 skb->head = data; 162 skb->data = data; 163 skb->tail = data; 164 skb->end = data + size; 165 if (fclone) { 166 struct sk_buff *child = skb + 1; 167 atomic_t *fclone_ref = (atomic_t *) (child + 1); 168 169 skb->fclone = SKB_FCLONE_ORIG; 170 atomic_set(fclone_ref, 1); 171 172 child->fclone = SKB_FCLONE_UNAVAILABLE; 173 } 174 atomic_set(&(skb_shinfo(skb)->dataref), 1); 175 skb_shinfo(skb)->nr_frags = 0; 176 skb_shinfo(skb)->tso_size = 0; 177 skb_shinfo(skb)->tso_segs = 0; 178 skb_shinfo(skb)->frag_list = NULL; 179 skb_shinfo(skb)->ufo_size = 0; 180 skb_shinfo(skb)->ip6_frag_id = 0; 181 out: 182 return skb; 183 nodata: 184 kmem_cache_free(skbuff_head_cache, skb); 185 skb = NULL; 186 goto out; 187 } 188 189 /** 190 * alloc_skb_from_cache - allocate a network buffer 191 * @cp: kmem_cache from which to allocate the data area 192 * (object size must be big enough for @size bytes + skb overheads) 193 * @size: size to allocate 194 * @gfp_mask: allocation mask 195 * 196 * Allocate a new &sk_buff. The returned buffer has no headroom and 197 * tail room of size bytes. The object has a reference count of one. 198 * The return is the buffer. On a failure the return is %NULL. 199 * 200 * Buffers may only be allocated from interrupts using a @gfp_mask of 201 * %GFP_ATOMIC. 202 */ 203 struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, 204 unsigned int size, 205 gfp_t gfp_mask) 206 { 207 struct sk_buff *skb; 208 u8 *data; 209 210 /* Get the HEAD */ 211 skb = kmem_cache_alloc(skbuff_head_cache, 212 gfp_mask & ~__GFP_DMA); 213 if (!skb) 214 goto out; 215 216 /* Get the DATA. */ 217 size = SKB_DATA_ALIGN(size); 218 data = kmem_cache_alloc(cp, gfp_mask); 219 if (!data) 220 goto nodata; 221 222 memset(skb, 0, offsetof(struct sk_buff, truesize)); 223 skb->truesize = size + sizeof(struct sk_buff); 224 atomic_set(&skb->users, 1); 225 skb->head = data; 226 skb->data = data; 227 skb->tail = data; 228 skb->end = data + size; 229 230 atomic_set(&(skb_shinfo(skb)->dataref), 1); 231 skb_shinfo(skb)->nr_frags = 0; 232 skb_shinfo(skb)->tso_size = 0; 233 skb_shinfo(skb)->tso_segs = 0; 234 skb_shinfo(skb)->frag_list = NULL; 235 out: 236 return skb; 237 nodata: 238 kmem_cache_free(skbuff_head_cache, skb); 239 skb = NULL; 240 goto out; 241 } 242 243 244 static void skb_drop_fraglist(struct sk_buff *skb) 245 { 246 struct sk_buff *list = skb_shinfo(skb)->frag_list; 247 248 skb_shinfo(skb)->frag_list = NULL; 249 250 do { 251 struct sk_buff *this = list; 252 list = list->next; 253 kfree_skb(this); 254 } while (list); 255 } 256 257 static void skb_clone_fraglist(struct sk_buff *skb) 258 { 259 struct sk_buff *list; 260 261 for (list = skb_shinfo(skb)->frag_list; list; list = list->next) 262 skb_get(list); 263 } 264 265 void skb_release_data(struct sk_buff *skb) 266 { 267 if (!skb->cloned || 268 !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1, 269 &skb_shinfo(skb)->dataref)) { 270 if (skb_shinfo(skb)->nr_frags) { 271 int i; 272 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 273 put_page(skb_shinfo(skb)->frags[i].page); 274 } 275 276 if (skb_shinfo(skb)->frag_list) 277 skb_drop_fraglist(skb); 278 279 kfree(skb->head); 280 } 281 } 282 283 /* 284 * Free an skbuff by memory without cleaning the state. 285 */ 286 void kfree_skbmem(struct sk_buff *skb) 287 { 288 struct sk_buff *other; 289 atomic_t *fclone_ref; 290 291 skb_release_data(skb); 292 switch (skb->fclone) { 293 case SKB_FCLONE_UNAVAILABLE: 294 kmem_cache_free(skbuff_head_cache, skb); 295 break; 296 297 case SKB_FCLONE_ORIG: 298 fclone_ref = (atomic_t *) (skb + 2); 299 if (atomic_dec_and_test(fclone_ref)) 300 kmem_cache_free(skbuff_fclone_cache, skb); 301 break; 302 303 case SKB_FCLONE_CLONE: 304 fclone_ref = (atomic_t *) (skb + 1); 305 other = skb - 1; 306 307 /* The clone portion is available for 308 * fast-cloning again. 309 */ 310 skb->fclone = SKB_FCLONE_UNAVAILABLE; 311 312 if (atomic_dec_and_test(fclone_ref)) 313 kmem_cache_free(skbuff_fclone_cache, other); 314 break; 315 }; 316 } 317 318 /** 319 * __kfree_skb - private function 320 * @skb: buffer 321 * 322 * Free an sk_buff. Release anything attached to the buffer. 323 * Clean the state. This is an internal helper function. Users should 324 * always call kfree_skb 325 */ 326 327 void __kfree_skb(struct sk_buff *skb) 328 { 329 dst_release(skb->dst); 330 #ifdef CONFIG_XFRM 331 secpath_put(skb->sp); 332 #endif 333 if (skb->destructor) { 334 WARN_ON(in_irq()); 335 skb->destructor(skb); 336 } 337 #ifdef CONFIG_NETFILTER 338 nf_conntrack_put(skb->nfct); 339 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 340 nf_conntrack_put_reasm(skb->nfct_reasm); 341 #endif 342 #ifdef CONFIG_BRIDGE_NETFILTER 343 nf_bridge_put(skb->nf_bridge); 344 #endif 345 #endif 346 /* XXX: IS this still necessary? - JHS */ 347 #ifdef CONFIG_NET_SCHED 348 skb->tc_index = 0; 349 #ifdef CONFIG_NET_CLS_ACT 350 skb->tc_verd = 0; 351 #endif 352 #endif 353 354 kfree_skbmem(skb); 355 } 356 357 /** 358 * skb_clone - duplicate an sk_buff 359 * @skb: buffer to clone 360 * @gfp_mask: allocation priority 361 * 362 * Duplicate an &sk_buff. The new one is not owned by a socket. Both 363 * copies share the same packet data but not structure. The new 364 * buffer has a reference count of 1. If the allocation fails the 365 * function returns %NULL otherwise the new buffer is returned. 366 * 367 * If this function is called from an interrupt gfp_mask() must be 368 * %GFP_ATOMIC. 369 */ 370 371 struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) 372 { 373 struct sk_buff *n; 374 375 n = skb + 1; 376 if (skb->fclone == SKB_FCLONE_ORIG && 377 n->fclone == SKB_FCLONE_UNAVAILABLE) { 378 atomic_t *fclone_ref = (atomic_t *) (n + 1); 379 n->fclone = SKB_FCLONE_CLONE; 380 atomic_inc(fclone_ref); 381 } else { 382 n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); 383 if (!n) 384 return NULL; 385 n->fclone = SKB_FCLONE_UNAVAILABLE; 386 } 387 388 #define C(x) n->x = skb->x 389 390 n->next = n->prev = NULL; 391 n->sk = NULL; 392 C(tstamp); 393 C(dev); 394 C(h); 395 C(nh); 396 C(mac); 397 C(dst); 398 dst_clone(skb->dst); 399 C(sp); 400 #ifdef CONFIG_INET 401 secpath_get(skb->sp); 402 #endif 403 memcpy(n->cb, skb->cb, sizeof(skb->cb)); 404 C(len); 405 C(data_len); 406 C(csum); 407 C(local_df); 408 n->cloned = 1; 409 n->nohdr = 0; 410 C(pkt_type); 411 C(ip_summed); 412 C(priority); 413 C(protocol); 414 n->destructor = NULL; 415 #ifdef CONFIG_NETFILTER 416 C(nfmark); 417 C(nfct); 418 nf_conntrack_get(skb->nfct); 419 C(nfctinfo); 420 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 421 C(nfct_reasm); 422 nf_conntrack_get_reasm(skb->nfct_reasm); 423 #endif 424 #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) 425 C(ipvs_property); 426 #endif 427 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 428 C(nfct_reasm); 429 nf_conntrack_get_reasm(skb->nfct_reasm); 430 #endif 431 #ifdef CONFIG_BRIDGE_NETFILTER 432 C(nf_bridge); 433 nf_bridge_get(skb->nf_bridge); 434 #endif 435 #endif /*CONFIG_NETFILTER*/ 436 #ifdef CONFIG_NET_SCHED 437 C(tc_index); 438 #ifdef CONFIG_NET_CLS_ACT 439 n->tc_verd = SET_TC_VERD(skb->tc_verd,0); 440 n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd); 441 n->tc_verd = CLR_TC_MUNGED(n->tc_verd); 442 C(input_dev); 443 #endif 444 445 #endif 446 C(truesize); 447 atomic_set(&n->users, 1); 448 C(head); 449 C(data); 450 C(tail); 451 C(end); 452 453 atomic_inc(&(skb_shinfo(skb)->dataref)); 454 skb->cloned = 1; 455 456 return n; 457 } 458 459 static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) 460 { 461 /* 462 * Shift between the two data areas in bytes 463 */ 464 unsigned long offset = new->data - old->data; 465 466 new->sk = NULL; 467 new->dev = old->dev; 468 new->priority = old->priority; 469 new->protocol = old->protocol; 470 new->dst = dst_clone(old->dst); 471 #ifdef CONFIG_INET 472 new->sp = secpath_get(old->sp); 473 #endif 474 new->h.raw = old->h.raw + offset; 475 new->nh.raw = old->nh.raw + offset; 476 new->mac.raw = old->mac.raw + offset; 477 memcpy(new->cb, old->cb, sizeof(old->cb)); 478 new->local_df = old->local_df; 479 new->fclone = SKB_FCLONE_UNAVAILABLE; 480 new->pkt_type = old->pkt_type; 481 new->tstamp = old->tstamp; 482 new->destructor = NULL; 483 #ifdef CONFIG_NETFILTER 484 new->nfmark = old->nfmark; 485 new->nfct = old->nfct; 486 nf_conntrack_get(old->nfct); 487 new->nfctinfo = old->nfctinfo; 488 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 489 new->nfct_reasm = old->nfct_reasm; 490 nf_conntrack_get_reasm(old->nfct_reasm); 491 #endif 492 #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) 493 new->ipvs_property = old->ipvs_property; 494 #endif 495 #ifdef CONFIG_BRIDGE_NETFILTER 496 new->nf_bridge = old->nf_bridge; 497 nf_bridge_get(old->nf_bridge); 498 #endif 499 #endif 500 #ifdef CONFIG_NET_SCHED 501 #ifdef CONFIG_NET_CLS_ACT 502 new->tc_verd = old->tc_verd; 503 #endif 504 new->tc_index = old->tc_index; 505 #endif 506 atomic_set(&new->users, 1); 507 skb_shinfo(new)->tso_size = skb_shinfo(old)->tso_size; 508 skb_shinfo(new)->tso_segs = skb_shinfo(old)->tso_segs; 509 } 510 511 /** 512 * skb_copy - create private copy of an sk_buff 513 * @skb: buffer to copy 514 * @gfp_mask: allocation priority 515 * 516 * Make a copy of both an &sk_buff and its data. This is used when the 517 * caller wishes to modify the data and needs a private copy of the 518 * data to alter. Returns %NULL on failure or the pointer to the buffer 519 * on success. The returned buffer has a reference count of 1. 520 * 521 * As by-product this function converts non-linear &sk_buff to linear 522 * one, so that &sk_buff becomes completely private and caller is allowed 523 * to modify all the data of returned buffer. This means that this 524 * function is not recommended for use in circumstances when only 525 * header is going to be modified. Use pskb_copy() instead. 526 */ 527 528 struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) 529 { 530 int headerlen = skb->data - skb->head; 531 /* 532 * Allocate the copy buffer 533 */ 534 struct sk_buff *n = alloc_skb(skb->end - skb->head + skb->data_len, 535 gfp_mask); 536 if (!n) 537 return NULL; 538 539 /* Set the data pointer */ 540 skb_reserve(n, headerlen); 541 /* Set the tail pointer and length */ 542 skb_put(n, skb->len); 543 n->csum = skb->csum; 544 n->ip_summed = skb->ip_summed; 545 546 if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)) 547 BUG(); 548 549 copy_skb_header(n, skb); 550 return n; 551 } 552 553 554 /** 555 * pskb_copy - create copy of an sk_buff with private head. 556 * @skb: buffer to copy 557 * @gfp_mask: allocation priority 558 * 559 * Make a copy of both an &sk_buff and part of its data, located 560 * in header. Fragmented data remain shared. This is used when 561 * the caller wishes to modify only header of &sk_buff and needs 562 * private copy of the header to alter. Returns %NULL on failure 563 * or the pointer to the buffer on success. 564 * The returned buffer has a reference count of 1. 565 */ 566 567 struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) 568 { 569 /* 570 * Allocate the copy buffer 571 */ 572 struct sk_buff *n = alloc_skb(skb->end - skb->head, gfp_mask); 573 574 if (!n) 575 goto out; 576 577 /* Set the data pointer */ 578 skb_reserve(n, skb->data - skb->head); 579 /* Set the tail pointer and length */ 580 skb_put(n, skb_headlen(skb)); 581 /* Copy the bytes */ 582 memcpy(n->data, skb->data, n->len); 583 n->csum = skb->csum; 584 n->ip_summed = skb->ip_summed; 585 586 n->data_len = skb->data_len; 587 n->len = skb->len; 588 589 if (skb_shinfo(skb)->nr_frags) { 590 int i; 591 592 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 593 skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; 594 get_page(skb_shinfo(n)->frags[i].page); 595 } 596 skb_shinfo(n)->nr_frags = i; 597 } 598 599 if (skb_shinfo(skb)->frag_list) { 600 skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; 601 skb_clone_fraglist(n); 602 } 603 604 copy_skb_header(n, skb); 605 out: 606 return n; 607 } 608 609 /** 610 * pskb_expand_head - reallocate header of &sk_buff 611 * @skb: buffer to reallocate 612 * @nhead: room to add at head 613 * @ntail: room to add at tail 614 * @gfp_mask: allocation priority 615 * 616 * Expands (or creates identical copy, if &nhead and &ntail are zero) 617 * header of skb. &sk_buff itself is not changed. &sk_buff MUST have 618 * reference count of 1. Returns zero in the case of success or error, 619 * if expansion failed. In the last case, &sk_buff is not changed. 620 * 621 * All the pointers pointing into skb header may change and must be 622 * reloaded after call to this function. 623 */ 624 625 int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, 626 gfp_t gfp_mask) 627 { 628 int i; 629 u8 *data; 630 int size = nhead + (skb->end - skb->head) + ntail; 631 long off; 632 633 if (skb_shared(skb)) 634 BUG(); 635 636 size = SKB_DATA_ALIGN(size); 637 638 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); 639 if (!data) 640 goto nodata; 641 642 /* Copy only real data... and, alas, header. This should be 643 * optimized for the cases when header is void. */ 644 memcpy(data + nhead, skb->head, skb->tail - skb->head); 645 memcpy(data + size, skb->end, sizeof(struct skb_shared_info)); 646 647 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 648 get_page(skb_shinfo(skb)->frags[i].page); 649 650 if (skb_shinfo(skb)->frag_list) 651 skb_clone_fraglist(skb); 652 653 skb_release_data(skb); 654 655 off = (data + nhead) - skb->head; 656 657 skb->head = data; 658 skb->end = data + size; 659 skb->data += off; 660 skb->tail += off; 661 skb->mac.raw += off; 662 skb->h.raw += off; 663 skb->nh.raw += off; 664 skb->cloned = 0; 665 skb->nohdr = 0; 666 atomic_set(&skb_shinfo(skb)->dataref, 1); 667 return 0; 668 669 nodata: 670 return -ENOMEM; 671 } 672 673 /* Make private copy of skb with writable head and some headroom */ 674 675 struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) 676 { 677 struct sk_buff *skb2; 678 int delta = headroom - skb_headroom(skb); 679 680 if (delta <= 0) 681 skb2 = pskb_copy(skb, GFP_ATOMIC); 682 else { 683 skb2 = skb_clone(skb, GFP_ATOMIC); 684 if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, 685 GFP_ATOMIC)) { 686 kfree_skb(skb2); 687 skb2 = NULL; 688 } 689 } 690 return skb2; 691 } 692 693 694 /** 695 * skb_copy_expand - copy and expand sk_buff 696 * @skb: buffer to copy 697 * @newheadroom: new free bytes at head 698 * @newtailroom: new free bytes at tail 699 * @gfp_mask: allocation priority 700 * 701 * Make a copy of both an &sk_buff and its data and while doing so 702 * allocate additional space. 703 * 704 * This is used when the caller wishes to modify the data and needs a 705 * private copy of the data to alter as well as more space for new fields. 706 * Returns %NULL on failure or the pointer to the buffer 707 * on success. The returned buffer has a reference count of 1. 708 * 709 * You must pass %GFP_ATOMIC as the allocation priority if this function 710 * is called from an interrupt. 711 * 712 * BUG ALERT: ip_summed is not copied. Why does this work? Is it used 713 * only by netfilter in the cases when checksum is recalculated? --ANK 714 */ 715 struct sk_buff *skb_copy_expand(const struct sk_buff *skb, 716 int newheadroom, int newtailroom, 717 gfp_t gfp_mask) 718 { 719 /* 720 * Allocate the copy buffer 721 */ 722 struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom, 723 gfp_mask); 724 int head_copy_len, head_copy_off; 725 726 if (!n) 727 return NULL; 728 729 skb_reserve(n, newheadroom); 730 731 /* Set the tail pointer and length */ 732 skb_put(n, skb->len); 733 734 head_copy_len = skb_headroom(skb); 735 head_copy_off = 0; 736 if (newheadroom <= head_copy_len) 737 head_copy_len = newheadroom; 738 else 739 head_copy_off = newheadroom - head_copy_len; 740 741 /* Copy the linear header and data. */ 742 if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off, 743 skb->len + head_copy_len)) 744 BUG(); 745 746 copy_skb_header(n, skb); 747 748 return n; 749 } 750 751 /** 752 * skb_pad - zero pad the tail of an skb 753 * @skb: buffer to pad 754 * @pad: space to pad 755 * 756 * Ensure that a buffer is followed by a padding area that is zero 757 * filled. Used by network drivers which may DMA or transfer data 758 * beyond the buffer end onto the wire. 759 * 760 * May return NULL in out of memory cases. 761 */ 762 763 struct sk_buff *skb_pad(struct sk_buff *skb, int pad) 764 { 765 struct sk_buff *nskb; 766 767 /* If the skbuff is non linear tailroom is always zero.. */ 768 if (skb_tailroom(skb) >= pad) { 769 memset(skb->data+skb->len, 0, pad); 770 return skb; 771 } 772 773 nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + pad, GFP_ATOMIC); 774 kfree_skb(skb); 775 if (nskb) 776 memset(nskb->data+nskb->len, 0, pad); 777 return nskb; 778 } 779 780 /* Trims skb to length len. It can change skb pointers, if "realloc" is 1. 781 * If realloc==0 and trimming is impossible without change of data, 782 * it is BUG(). 783 */ 784 785 int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc) 786 { 787 int offset = skb_headlen(skb); 788 int nfrags = skb_shinfo(skb)->nr_frags; 789 int i; 790 791 for (i = 0; i < nfrags; i++) { 792 int end = offset + skb_shinfo(skb)->frags[i].size; 793 if (end > len) { 794 if (skb_cloned(skb)) { 795 if (!realloc) 796 BUG(); 797 if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) 798 return -ENOMEM; 799 } 800 if (len <= offset) { 801 put_page(skb_shinfo(skb)->frags[i].page); 802 skb_shinfo(skb)->nr_frags--; 803 } else { 804 skb_shinfo(skb)->frags[i].size = len - offset; 805 } 806 } 807 offset = end; 808 } 809 810 if (offset < len) { 811 skb->data_len -= skb->len - len; 812 skb->len = len; 813 } else { 814 if (len <= skb_headlen(skb)) { 815 skb->len = len; 816 skb->data_len = 0; 817 skb->tail = skb->data + len; 818 if (skb_shinfo(skb)->frag_list && !skb_cloned(skb)) 819 skb_drop_fraglist(skb); 820 } else { 821 skb->data_len -= skb->len - len; 822 skb->len = len; 823 } 824 } 825 826 return 0; 827 } 828 829 /** 830 * __pskb_pull_tail - advance tail of skb header 831 * @skb: buffer to reallocate 832 * @delta: number of bytes to advance tail 833 * 834 * The function makes a sense only on a fragmented &sk_buff, 835 * it expands header moving its tail forward and copying necessary 836 * data from fragmented part. 837 * 838 * &sk_buff MUST have reference count of 1. 839 * 840 * Returns %NULL (and &sk_buff does not change) if pull failed 841 * or value of new tail of skb in the case of success. 842 * 843 * All the pointers pointing into skb header may change and must be 844 * reloaded after call to this function. 845 */ 846 847 /* Moves tail of skb head forward, copying data from fragmented part, 848 * when it is necessary. 849 * 1. It may fail due to malloc failure. 850 * 2. It may change skb pointers. 851 * 852 * It is pretty complicated. Luckily, it is called only in exceptional cases. 853 */ 854 unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta) 855 { 856 /* If skb has not enough free space at tail, get new one 857 * plus 128 bytes for future expansions. If we have enough 858 * room at tail, reallocate without expansion only if skb is cloned. 859 */ 860 int i, k, eat = (skb->tail + delta) - skb->end; 861 862 if (eat > 0 || skb_cloned(skb)) { 863 if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0, 864 GFP_ATOMIC)) 865 return NULL; 866 } 867 868 if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta)) 869 BUG(); 870 871 /* Optimization: no fragments, no reasons to preestimate 872 * size of pulled pages. Superb. 873 */ 874 if (!skb_shinfo(skb)->frag_list) 875 goto pull_pages; 876 877 /* Estimate size of pulled pages. */ 878 eat = delta; 879 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 880 if (skb_shinfo(skb)->frags[i].size >= eat) 881 goto pull_pages; 882 eat -= skb_shinfo(skb)->frags[i].size; 883 } 884 885 /* If we need update frag list, we are in troubles. 886 * Certainly, it possible to add an offset to skb data, 887 * but taking into account that pulling is expected to 888 * be very rare operation, it is worth to fight against 889 * further bloating skb head and crucify ourselves here instead. 890 * Pure masohism, indeed. 8)8) 891 */ 892 if (eat) { 893 struct sk_buff *list = skb_shinfo(skb)->frag_list; 894 struct sk_buff *clone = NULL; 895 struct sk_buff *insp = NULL; 896 897 do { 898 if (!list) 899 BUG(); 900 901 if (list->len <= eat) { 902 /* Eaten as whole. */ 903 eat -= list->len; 904 list = list->next; 905 insp = list; 906 } else { 907 /* Eaten partially. */ 908 909 if (skb_shared(list)) { 910 /* Sucks! We need to fork list. :-( */ 911 clone = skb_clone(list, GFP_ATOMIC); 912 if (!clone) 913 return NULL; 914 insp = list->next; 915 list = clone; 916 } else { 917 /* This may be pulled without 918 * problems. */ 919 insp = list; 920 } 921 if (!pskb_pull(list, eat)) { 922 if (clone) 923 kfree_skb(clone); 924 return NULL; 925 } 926 break; 927 } 928 } while (eat); 929 930 /* Free pulled out fragments. */ 931 while ((list = skb_shinfo(skb)->frag_list) != insp) { 932 skb_shinfo(skb)->frag_list = list->next; 933 kfree_skb(list); 934 } 935 /* And insert new clone at head. */ 936 if (clone) { 937 clone->next = list; 938 skb_shinfo(skb)->frag_list = clone; 939 } 940 } 941 /* Success! Now we may commit changes to skb data. */ 942 943 pull_pages: 944 eat = delta; 945 k = 0; 946 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 947 if (skb_shinfo(skb)->frags[i].size <= eat) { 948 put_page(skb_shinfo(skb)->frags[i].page); 949 eat -= skb_shinfo(skb)->frags[i].size; 950 } else { 951 skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; 952 if (eat) { 953 skb_shinfo(skb)->frags[k].page_offset += eat; 954 skb_shinfo(skb)->frags[k].size -= eat; 955 eat = 0; 956 } 957 k++; 958 } 959 } 960 skb_shinfo(skb)->nr_frags = k; 961 962 skb->tail += delta; 963 skb->data_len -= delta; 964 965 return skb->tail; 966 } 967 968 /* Copy some data bits from skb to kernel buffer. */ 969 970 int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) 971 { 972 int i, copy; 973 int start = skb_headlen(skb); 974 975 if (offset > (int)skb->len - len) 976 goto fault; 977 978 /* Copy header. */ 979 if ((copy = start - offset) > 0) { 980 if (copy > len) 981 copy = len; 982 memcpy(to, skb->data + offset, copy); 983 if ((len -= copy) == 0) 984 return 0; 985 offset += copy; 986 to += copy; 987 } 988 989 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 990 int end; 991 992 BUG_TRAP(start <= offset + len); 993 994 end = start + skb_shinfo(skb)->frags[i].size; 995 if ((copy = end - offset) > 0) { 996 u8 *vaddr; 997 998 if (copy > len) 999 copy = len; 1000 1001 vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); 1002 memcpy(to, 1003 vaddr + skb_shinfo(skb)->frags[i].page_offset+ 1004 offset - start, copy); 1005 kunmap_skb_frag(vaddr); 1006 1007 if ((len -= copy) == 0) 1008 return 0; 1009 offset += copy; 1010 to += copy; 1011 } 1012 start = end; 1013 } 1014 1015 if (skb_shinfo(skb)->frag_list) { 1016 struct sk_buff *list = skb_shinfo(skb)->frag_list; 1017 1018 for (; list; list = list->next) { 1019 int end; 1020 1021 BUG_TRAP(start <= offset + len); 1022 1023 end = start + list->len; 1024 if ((copy = end - offset) > 0) { 1025 if (copy > len) 1026 copy = len; 1027 if (skb_copy_bits(list, offset - start, 1028 to, copy)) 1029 goto fault; 1030 if ((len -= copy) == 0) 1031 return 0; 1032 offset += copy; 1033 to += copy; 1034 } 1035 start = end; 1036 } 1037 } 1038 if (!len) 1039 return 0; 1040 1041 fault: 1042 return -EFAULT; 1043 } 1044 1045 /** 1046 * skb_store_bits - store bits from kernel buffer to skb 1047 * @skb: destination buffer 1048 * @offset: offset in destination 1049 * @from: source buffer 1050 * @len: number of bytes to copy 1051 * 1052 * Copy the specified number of bytes from the source buffer to the 1053 * destination skb. This function handles all the messy bits of 1054 * traversing fragment lists and such. 1055 */ 1056 1057 int skb_store_bits(const struct sk_buff *skb, int offset, void *from, int len) 1058 { 1059 int i, copy; 1060 int start = skb_headlen(skb); 1061 1062 if (offset > (int)skb->len - len) 1063 goto fault; 1064 1065 if ((copy = start - offset) > 0) { 1066 if (copy > len) 1067 copy = len; 1068 memcpy(skb->data + offset, from, copy); 1069 if ((len -= copy) == 0) 1070 return 0; 1071 offset += copy; 1072 from += copy; 1073 } 1074 1075 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1076 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 1077 int end; 1078 1079 BUG_TRAP(start <= offset + len); 1080 1081 end = start + frag->size; 1082 if ((copy = end - offset) > 0) { 1083 u8 *vaddr; 1084 1085 if (copy > len) 1086 copy = len; 1087 1088 vaddr = kmap_skb_frag(frag); 1089 memcpy(vaddr + frag->page_offset + offset - start, 1090 from, copy); 1091 kunmap_skb_frag(vaddr); 1092 1093 if ((len -= copy) == 0) 1094 return 0; 1095 offset += copy; 1096 from += copy; 1097 } 1098 start = end; 1099 } 1100 1101 if (skb_shinfo(skb)->frag_list) { 1102 struct sk_buff *list = skb_shinfo(skb)->frag_list; 1103 1104 for (; list; list = list->next) { 1105 int end; 1106 1107 BUG_TRAP(start <= offset + len); 1108 1109 end = start + list->len; 1110 if ((copy = end - offset) > 0) { 1111 if (copy > len) 1112 copy = len; 1113 if (skb_store_bits(list, offset - start, 1114 from, copy)) 1115 goto fault; 1116 if ((len -= copy) == 0) 1117 return 0; 1118 offset += copy; 1119 from += copy; 1120 } 1121 start = end; 1122 } 1123 } 1124 if (!len) 1125 return 0; 1126 1127 fault: 1128 return -EFAULT; 1129 } 1130 1131 EXPORT_SYMBOL(skb_store_bits); 1132 1133 /* Checksum skb data. */ 1134 1135 unsigned int skb_checksum(const struct sk_buff *skb, int offset, 1136 int len, unsigned int csum) 1137 { 1138 int start = skb_headlen(skb); 1139 int i, copy = start - offset; 1140 int pos = 0; 1141 1142 /* Checksum header. */ 1143 if (copy > 0) { 1144 if (copy > len) 1145 copy = len; 1146 csum = csum_partial(skb->data + offset, copy, csum); 1147 if ((len -= copy) == 0) 1148 return csum; 1149 offset += copy; 1150 pos = copy; 1151 } 1152 1153 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1154 int end; 1155 1156 BUG_TRAP(start <= offset + len); 1157 1158 end = start + skb_shinfo(skb)->frags[i].size; 1159 if ((copy = end - offset) > 0) { 1160 unsigned int csum2; 1161 u8 *vaddr; 1162 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 1163 1164 if (copy > len) 1165 copy = len; 1166 vaddr = kmap_skb_frag(frag); 1167 csum2 = csum_partial(vaddr + frag->page_offset + 1168 offset - start, copy, 0); 1169 kunmap_skb_frag(vaddr); 1170 csum = csum_block_add(csum, csum2, pos); 1171 if (!(len -= copy)) 1172 return csum; 1173 offset += copy; 1174 pos += copy; 1175 } 1176 start = end; 1177 } 1178 1179 if (skb_shinfo(skb)->frag_list) { 1180 struct sk_buff *list = skb_shinfo(skb)->frag_list; 1181 1182 for (; list; list = list->next) { 1183 int end; 1184 1185 BUG_TRAP(start <= offset + len); 1186 1187 end = start + list->len; 1188 if ((copy = end - offset) > 0) { 1189 unsigned int csum2; 1190 if (copy > len) 1191 copy = len; 1192 csum2 = skb_checksum(list, offset - start, 1193 copy, 0); 1194 csum = csum_block_add(csum, csum2, pos); 1195 if ((len -= copy) == 0) 1196 return csum; 1197 offset += copy; 1198 pos += copy; 1199 } 1200 start = end; 1201 } 1202 } 1203 if (len) 1204 BUG(); 1205 1206 return csum; 1207 } 1208 1209 /* Both of above in one bottle. */ 1210 1211 unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, 1212 u8 *to, int len, unsigned int csum) 1213 { 1214 int start = skb_headlen(skb); 1215 int i, copy = start - offset; 1216 int pos = 0; 1217 1218 /* Copy header. */ 1219 if (copy > 0) { 1220 if (copy > len) 1221 copy = len; 1222 csum = csum_partial_copy_nocheck(skb->data + offset, to, 1223 copy, csum); 1224 if ((len -= copy) == 0) 1225 return csum; 1226 offset += copy; 1227 to += copy; 1228 pos = copy; 1229 } 1230 1231 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1232 int end; 1233 1234 BUG_TRAP(start <= offset + len); 1235 1236 end = start + skb_shinfo(skb)->frags[i].size; 1237 if ((copy = end - offset) > 0) { 1238 unsigned int csum2; 1239 u8 *vaddr; 1240 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 1241 1242 if (copy > len) 1243 copy = len; 1244 vaddr = kmap_skb_frag(frag); 1245 csum2 = csum_partial_copy_nocheck(vaddr + 1246 frag->page_offset + 1247 offset - start, to, 1248 copy, 0); 1249 kunmap_skb_frag(vaddr); 1250 csum = csum_block_add(csum, csum2, pos); 1251 if (!(len -= copy)) 1252 return csum; 1253 offset += copy; 1254 to += copy; 1255 pos += copy; 1256 } 1257 start = end; 1258 } 1259 1260 if (skb_shinfo(skb)->frag_list) { 1261 struct sk_buff *list = skb_shinfo(skb)->frag_list; 1262 1263 for (; list; list = list->next) { 1264 unsigned int csum2; 1265 int end; 1266 1267 BUG_TRAP(start <= offset + len); 1268 1269 end = start + list->len; 1270 if ((copy = end - offset) > 0) { 1271 if (copy > len) 1272 copy = len; 1273 csum2 = skb_copy_and_csum_bits(list, 1274 offset - start, 1275 to, copy, 0); 1276 csum = csum_block_add(csum, csum2, pos); 1277 if ((len -= copy) == 0) 1278 return csum; 1279 offset += copy; 1280 to += copy; 1281 pos += copy; 1282 } 1283 start = end; 1284 } 1285 } 1286 if (len) 1287 BUG(); 1288 return csum; 1289 } 1290 1291 void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) 1292 { 1293 unsigned int csum; 1294 long csstart; 1295 1296 if (skb->ip_summed == CHECKSUM_HW) 1297 csstart = skb->h.raw - skb->data; 1298 else 1299 csstart = skb_headlen(skb); 1300 1301 if (csstart > skb_headlen(skb)) 1302 BUG(); 1303 1304 memcpy(to, skb->data, csstart); 1305 1306 csum = 0; 1307 if (csstart != skb->len) 1308 csum = skb_copy_and_csum_bits(skb, csstart, to + csstart, 1309 skb->len - csstart, 0); 1310 1311 if (skb->ip_summed == CHECKSUM_HW) { 1312 long csstuff = csstart + skb->csum; 1313 1314 *((unsigned short *)(to + csstuff)) = csum_fold(csum); 1315 } 1316 } 1317 1318 /** 1319 * skb_dequeue - remove from the head of the queue 1320 * @list: list to dequeue from 1321 * 1322 * Remove the head of the list. The list lock is taken so the function 1323 * may be used safely with other locking list functions. The head item is 1324 * returned or %NULL if the list is empty. 1325 */ 1326 1327 struct sk_buff *skb_dequeue(struct sk_buff_head *list) 1328 { 1329 unsigned long flags; 1330 struct sk_buff *result; 1331 1332 spin_lock_irqsave(&list->lock, flags); 1333 result = __skb_dequeue(list); 1334 spin_unlock_irqrestore(&list->lock, flags); 1335 return result; 1336 } 1337 1338 /** 1339 * skb_dequeue_tail - remove from the tail of the queue 1340 * @list: list to dequeue from 1341 * 1342 * Remove the tail of the list. The list lock is taken so the function 1343 * may be used safely with other locking list functions. The tail item is 1344 * returned or %NULL if the list is empty. 1345 */ 1346 struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list) 1347 { 1348 unsigned long flags; 1349 struct sk_buff *result; 1350 1351 spin_lock_irqsave(&list->lock, flags); 1352 result = __skb_dequeue_tail(list); 1353 spin_unlock_irqrestore(&list->lock, flags); 1354 return result; 1355 } 1356 1357 /** 1358 * skb_queue_purge - empty a list 1359 * @list: list to empty 1360 * 1361 * Delete all buffers on an &sk_buff list. Each buffer is removed from 1362 * the list and one reference dropped. This function takes the list 1363 * lock and is atomic with respect to other list locking functions. 1364 */ 1365 void skb_queue_purge(struct sk_buff_head *list) 1366 { 1367 struct sk_buff *skb; 1368 while ((skb = skb_dequeue(list)) != NULL) 1369 kfree_skb(skb); 1370 } 1371 1372 /** 1373 * skb_queue_head - queue a buffer at the list head 1374 * @list: list to use 1375 * @newsk: buffer to queue 1376 * 1377 * Queue a buffer at the start of the list. This function takes the 1378 * list lock and can be used safely with other locking &sk_buff functions 1379 * safely. 1380 * 1381 * A buffer cannot be placed on two lists at the same time. 1382 */ 1383 void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) 1384 { 1385 unsigned long flags; 1386 1387 spin_lock_irqsave(&list->lock, flags); 1388 __skb_queue_head(list, newsk); 1389 spin_unlock_irqrestore(&list->lock, flags); 1390 } 1391 1392 /** 1393 * skb_queue_tail - queue a buffer at the list tail 1394 * @list: list to use 1395 * @newsk: buffer to queue 1396 * 1397 * Queue a buffer at the tail of the list. This function takes the 1398 * list lock and can be used safely with other locking &sk_buff functions 1399 * safely. 1400 * 1401 * A buffer cannot be placed on two lists at the same time. 1402 */ 1403 void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) 1404 { 1405 unsigned long flags; 1406 1407 spin_lock_irqsave(&list->lock, flags); 1408 __skb_queue_tail(list, newsk); 1409 spin_unlock_irqrestore(&list->lock, flags); 1410 } 1411 1412 /** 1413 * skb_unlink - remove a buffer from a list 1414 * @skb: buffer to remove 1415 * @list: list to use 1416 * 1417 * Remove a packet from a list. The list locks are taken and this 1418 * function is atomic with respect to other list locked calls 1419 * 1420 * You must know what list the SKB is on. 1421 */ 1422 void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) 1423 { 1424 unsigned long flags; 1425 1426 spin_lock_irqsave(&list->lock, flags); 1427 __skb_unlink(skb, list); 1428 spin_unlock_irqrestore(&list->lock, flags); 1429 } 1430 1431 /** 1432 * skb_append - append a buffer 1433 * @old: buffer to insert after 1434 * @newsk: buffer to insert 1435 * @list: list to use 1436 * 1437 * Place a packet after a given packet in a list. The list locks are taken 1438 * and this function is atomic with respect to other list locked calls. 1439 * A buffer cannot be placed on two lists at the same time. 1440 */ 1441 void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) 1442 { 1443 unsigned long flags; 1444 1445 spin_lock_irqsave(&list->lock, flags); 1446 __skb_append(old, newsk, list); 1447 spin_unlock_irqrestore(&list->lock, flags); 1448 } 1449 1450 1451 /** 1452 * skb_insert - insert a buffer 1453 * @old: buffer to insert before 1454 * @newsk: buffer to insert 1455 * @list: list to use 1456 * 1457 * Place a packet before a given packet in a list. The list locks are 1458 * taken and this function is atomic with respect to other list locked 1459 * calls. 1460 * 1461 * A buffer cannot be placed on two lists at the same time. 1462 */ 1463 void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) 1464 { 1465 unsigned long flags; 1466 1467 spin_lock_irqsave(&list->lock, flags); 1468 __skb_insert(newsk, old->prev, old, list); 1469 spin_unlock_irqrestore(&list->lock, flags); 1470 } 1471 1472 #if 0 1473 /* 1474 * Tune the memory allocator for a new MTU size. 1475 */ 1476 void skb_add_mtu(int mtu) 1477 { 1478 /* Must match allocation in alloc_skb */ 1479 mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info); 1480 1481 kmem_add_cache_size(mtu); 1482 } 1483 #endif 1484 1485 static inline void skb_split_inside_header(struct sk_buff *skb, 1486 struct sk_buff* skb1, 1487 const u32 len, const int pos) 1488 { 1489 int i; 1490 1491 memcpy(skb_put(skb1, pos - len), skb->data + len, pos - len); 1492 1493 /* And move data appendix as is. */ 1494 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 1495 skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i]; 1496 1497 skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags; 1498 skb_shinfo(skb)->nr_frags = 0; 1499 skb1->data_len = skb->data_len; 1500 skb1->len += skb1->data_len; 1501 skb->data_len = 0; 1502 skb->len = len; 1503 skb->tail = skb->data + len; 1504 } 1505 1506 static inline void skb_split_no_header(struct sk_buff *skb, 1507 struct sk_buff* skb1, 1508 const u32 len, int pos) 1509 { 1510 int i, k = 0; 1511 const int nfrags = skb_shinfo(skb)->nr_frags; 1512 1513 skb_shinfo(skb)->nr_frags = 0; 1514 skb1->len = skb1->data_len = skb->len - len; 1515 skb->len = len; 1516 skb->data_len = len - pos; 1517 1518 for (i = 0; i < nfrags; i++) { 1519 int size = skb_shinfo(skb)->frags[i].size; 1520 1521 if (pos + size > len) { 1522 skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i]; 1523 1524 if (pos < len) { 1525 /* Split frag. 1526 * We have two variants in this case: 1527 * 1. Move all the frag to the second 1528 * part, if it is possible. F.e. 1529 * this approach is mandatory for TUX, 1530 * where splitting is expensive. 1531 * 2. Split is accurately. We make this. 1532 */ 1533 get_page(skb_shinfo(skb)->frags[i].page); 1534 skb_shinfo(skb1)->frags[0].page_offset += len - pos; 1535 skb_shinfo(skb1)->frags[0].size -= len - pos; 1536 skb_shinfo(skb)->frags[i].size = len - pos; 1537 skb_shinfo(skb)->nr_frags++; 1538 } 1539 k++; 1540 } else 1541 skb_shinfo(skb)->nr_frags++; 1542 pos += size; 1543 } 1544 skb_shinfo(skb1)->nr_frags = k; 1545 } 1546 1547 /** 1548 * skb_split - Split fragmented skb to two parts at length len. 1549 * @skb: the buffer to split 1550 * @skb1: the buffer to receive the second part 1551 * @len: new length for skb 1552 */ 1553 void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) 1554 { 1555 int pos = skb_headlen(skb); 1556 1557 if (len < pos) /* Split line is inside header. */ 1558 skb_split_inside_header(skb, skb1, len, pos); 1559 else /* Second chunk has no header, nothing to copy. */ 1560 skb_split_no_header(skb, skb1, len, pos); 1561 } 1562 1563 /** 1564 * skb_prepare_seq_read - Prepare a sequential read of skb data 1565 * @skb: the buffer to read 1566 * @from: lower offset of data to be read 1567 * @to: upper offset of data to be read 1568 * @st: state variable 1569 * 1570 * Initializes the specified state variable. Must be called before 1571 * invoking skb_seq_read() for the first time. 1572 */ 1573 void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, 1574 unsigned int to, struct skb_seq_state *st) 1575 { 1576 st->lower_offset = from; 1577 st->upper_offset = to; 1578 st->root_skb = st->cur_skb = skb; 1579 st->frag_idx = st->stepped_offset = 0; 1580 st->frag_data = NULL; 1581 } 1582 1583 /** 1584 * skb_seq_read - Sequentially read skb data 1585 * @consumed: number of bytes consumed by the caller so far 1586 * @data: destination pointer for data to be returned 1587 * @st: state variable 1588 * 1589 * Reads a block of skb data at &consumed relative to the 1590 * lower offset specified to skb_prepare_seq_read(). Assigns 1591 * the head of the data block to &data and returns the length 1592 * of the block or 0 if the end of the skb data or the upper 1593 * offset has been reached. 1594 * 1595 * The caller is not required to consume all of the data 1596 * returned, i.e. &consumed is typically set to the number 1597 * of bytes already consumed and the next call to 1598 * skb_seq_read() will return the remaining part of the block. 1599 * 1600 * Note: The size of each block of data returned can be arbitary, 1601 * this limitation is the cost for zerocopy seqeuental 1602 * reads of potentially non linear data. 1603 * 1604 * Note: Fragment lists within fragments are not implemented 1605 * at the moment, state->root_skb could be replaced with 1606 * a stack for this purpose. 1607 */ 1608 unsigned int skb_seq_read(unsigned int consumed, const u8 **data, 1609 struct skb_seq_state *st) 1610 { 1611 unsigned int block_limit, abs_offset = consumed + st->lower_offset; 1612 skb_frag_t *frag; 1613 1614 if (unlikely(abs_offset >= st->upper_offset)) 1615 return 0; 1616 1617 next_skb: 1618 block_limit = skb_headlen(st->cur_skb); 1619 1620 if (abs_offset < block_limit) { 1621 *data = st->cur_skb->data + abs_offset; 1622 return block_limit - abs_offset; 1623 } 1624 1625 if (st->frag_idx == 0 && !st->frag_data) 1626 st->stepped_offset += skb_headlen(st->cur_skb); 1627 1628 while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) { 1629 frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx]; 1630 block_limit = frag->size + st->stepped_offset; 1631 1632 if (abs_offset < block_limit) { 1633 if (!st->frag_data) 1634 st->frag_data = kmap_skb_frag(frag); 1635 1636 *data = (u8 *) st->frag_data + frag->page_offset + 1637 (abs_offset - st->stepped_offset); 1638 1639 return block_limit - abs_offset; 1640 } 1641 1642 if (st->frag_data) { 1643 kunmap_skb_frag(st->frag_data); 1644 st->frag_data = NULL; 1645 } 1646 1647 st->frag_idx++; 1648 st->stepped_offset += frag->size; 1649 } 1650 1651 if (st->cur_skb->next) { 1652 st->cur_skb = st->cur_skb->next; 1653 st->frag_idx = 0; 1654 goto next_skb; 1655 } else if (st->root_skb == st->cur_skb && 1656 skb_shinfo(st->root_skb)->frag_list) { 1657 st->cur_skb = skb_shinfo(st->root_skb)->frag_list; 1658 goto next_skb; 1659 } 1660 1661 return 0; 1662 } 1663 1664 /** 1665 * skb_abort_seq_read - Abort a sequential read of skb data 1666 * @st: state variable 1667 * 1668 * Must be called if skb_seq_read() was not called until it 1669 * returned 0. 1670 */ 1671 void skb_abort_seq_read(struct skb_seq_state *st) 1672 { 1673 if (st->frag_data) 1674 kunmap_skb_frag(st->frag_data); 1675 } 1676 1677 #define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb)) 1678 1679 static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text, 1680 struct ts_config *conf, 1681 struct ts_state *state) 1682 { 1683 return skb_seq_read(offset, text, TS_SKB_CB(state)); 1684 } 1685 1686 static void skb_ts_finish(struct ts_config *conf, struct ts_state *state) 1687 { 1688 skb_abort_seq_read(TS_SKB_CB(state)); 1689 } 1690 1691 /** 1692 * skb_find_text - Find a text pattern in skb data 1693 * @skb: the buffer to look in 1694 * @from: search offset 1695 * @to: search limit 1696 * @config: textsearch configuration 1697 * @state: uninitialized textsearch state variable 1698 * 1699 * Finds a pattern in the skb data according to the specified 1700 * textsearch configuration. Use textsearch_next() to retrieve 1701 * subsequent occurrences of the pattern. Returns the offset 1702 * to the first occurrence or UINT_MAX if no match was found. 1703 */ 1704 unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, 1705 unsigned int to, struct ts_config *config, 1706 struct ts_state *state) 1707 { 1708 config->get_next_block = skb_ts_get_next_block; 1709 config->finish = skb_ts_finish; 1710 1711 skb_prepare_seq_read(skb, from, to, TS_SKB_CB(state)); 1712 1713 return textsearch_find(config, state); 1714 } 1715 1716 /** 1717 * skb_append_datato_frags: - append the user data to a skb 1718 * @sk: sock structure 1719 * @skb: skb structure to be appened with user data. 1720 * @getfrag: call back function to be used for getting the user data 1721 * @from: pointer to user message iov 1722 * @length: length of the iov message 1723 * 1724 * Description: This procedure append the user data in the fragment part 1725 * of the skb if any page alloc fails user this procedure returns -ENOMEM 1726 */ 1727 int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, 1728 int (*getfrag)(void *from, char *to, int offset, 1729 int len, int odd, struct sk_buff *skb), 1730 void *from, int length) 1731 { 1732 int frg_cnt = 0; 1733 skb_frag_t *frag = NULL; 1734 struct page *page = NULL; 1735 int copy, left; 1736 int offset = 0; 1737 int ret; 1738 1739 do { 1740 /* Return error if we don't have space for new frag */ 1741 frg_cnt = skb_shinfo(skb)->nr_frags; 1742 if (frg_cnt >= MAX_SKB_FRAGS) 1743 return -EFAULT; 1744 1745 /* allocate a new page for next frag */ 1746 page = alloc_pages(sk->sk_allocation, 0); 1747 1748 /* If alloc_page fails just return failure and caller will 1749 * free previous allocated pages by doing kfree_skb() 1750 */ 1751 if (page == NULL) 1752 return -ENOMEM; 1753 1754 /* initialize the next frag */ 1755 sk->sk_sndmsg_page = page; 1756 sk->sk_sndmsg_off = 0; 1757 skb_fill_page_desc(skb, frg_cnt, page, 0, 0); 1758 skb->truesize += PAGE_SIZE; 1759 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc); 1760 1761 /* get the new initialized frag */ 1762 frg_cnt = skb_shinfo(skb)->nr_frags; 1763 frag = &skb_shinfo(skb)->frags[frg_cnt - 1]; 1764 1765 /* copy the user data to page */ 1766 left = PAGE_SIZE - frag->page_offset; 1767 copy = (length > left)? left : length; 1768 1769 ret = getfrag(from, (page_address(frag->page) + 1770 frag->page_offset + frag->size), 1771 offset, copy, 0, skb); 1772 if (ret < 0) 1773 return -EFAULT; 1774 1775 /* copy was successful so update the size parameters */ 1776 sk->sk_sndmsg_off += copy; 1777 frag->size += copy; 1778 skb->len += copy; 1779 skb->data_len += copy; 1780 offset += copy; 1781 length -= copy; 1782 1783 } while (length > 0); 1784 1785 return 0; 1786 } 1787 1788 void __init skb_init(void) 1789 { 1790 skbuff_head_cache = kmem_cache_create("skbuff_head_cache", 1791 sizeof(struct sk_buff), 1792 0, 1793 SLAB_HWCACHE_ALIGN, 1794 NULL, NULL); 1795 if (!skbuff_head_cache) 1796 panic("cannot create skbuff cache"); 1797 1798 skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", 1799 (2*sizeof(struct sk_buff)) + 1800 sizeof(atomic_t), 1801 0, 1802 SLAB_HWCACHE_ALIGN, 1803 NULL, NULL); 1804 if (!skbuff_fclone_cache) 1805 panic("cannot create skbuff cache"); 1806 } 1807 1808 EXPORT_SYMBOL(___pskb_trim); 1809 EXPORT_SYMBOL(__kfree_skb); 1810 EXPORT_SYMBOL(__pskb_pull_tail); 1811 EXPORT_SYMBOL(__alloc_skb); 1812 EXPORT_SYMBOL(pskb_copy); 1813 EXPORT_SYMBOL(pskb_expand_head); 1814 EXPORT_SYMBOL(skb_checksum); 1815 EXPORT_SYMBOL(skb_clone); 1816 EXPORT_SYMBOL(skb_clone_fraglist); 1817 EXPORT_SYMBOL(skb_copy); 1818 EXPORT_SYMBOL(skb_copy_and_csum_bits); 1819 EXPORT_SYMBOL(skb_copy_and_csum_dev); 1820 EXPORT_SYMBOL(skb_copy_bits); 1821 EXPORT_SYMBOL(skb_copy_expand); 1822 EXPORT_SYMBOL(skb_over_panic); 1823 EXPORT_SYMBOL(skb_pad); 1824 EXPORT_SYMBOL(skb_realloc_headroom); 1825 EXPORT_SYMBOL(skb_under_panic); 1826 EXPORT_SYMBOL(skb_dequeue); 1827 EXPORT_SYMBOL(skb_dequeue_tail); 1828 EXPORT_SYMBOL(skb_insert); 1829 EXPORT_SYMBOL(skb_queue_purge); 1830 EXPORT_SYMBOL(skb_queue_head); 1831 EXPORT_SYMBOL(skb_queue_tail); 1832 EXPORT_SYMBOL(skb_unlink); 1833 EXPORT_SYMBOL(skb_append); 1834 EXPORT_SYMBOL(skb_split); 1835 EXPORT_SYMBOL(skb_prepare_seq_read); 1836 EXPORT_SYMBOL(skb_seq_read); 1837 EXPORT_SYMBOL(skb_abort_seq_read); 1838 EXPORT_SYMBOL(skb_find_text); 1839 EXPORT_SYMBOL(skb_append_datato_frags); 1840