1 /* 2 * Routines having to do with the 'struct sk_buff' memory handlers. 3 * 4 * Authors: Alan Cox <iiitac@pyr.swan.ac.uk> 5 * Florian La Roche <rzsfl@rz.uni-sb.de> 6 * 7 * Version: $Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $ 8 * 9 * Fixes: 10 * Alan Cox : Fixed the worst of the load 11 * balancer bugs. 12 * Dave Platt : Interrupt stacking fix. 13 * Richard Kooijman : Timestamp fixes. 14 * Alan Cox : Changed buffer format. 15 * Alan Cox : destructor hook for AF_UNIX etc. 16 * Linus Torvalds : Better skb_clone. 17 * Alan Cox : Added skb_copy. 18 * Alan Cox : Added all the changed routines Linus 19 * only put in the headers 20 * Ray VanTassle : Fixed --skb->lock in free 21 * Alan Cox : skb_copy copy arp field 22 * Andi Kleen : slabified it. 23 * Robert Olsson : Removed skb_head_pool 24 * 25 * NOTE: 26 * The __skb_ routines should be called with interrupts 27 * disabled, or you better be *real* sure that the operation is atomic 28 * with respect to whatever list is being frobbed (e.g. via lock_sock() 29 * or via disabling bottom half handlers, etc). 30 * 31 * This program is free software; you can redistribute it and/or 32 * modify it under the terms of the GNU General Public License 33 * as published by the Free Software Foundation; either version 34 * 2 of the License, or (at your option) any later version. 35 */ 36 37 /* 38 * The functions in this file will not compile correctly with gcc 2.4.x 39 */ 40 41 #include <linux/module.h> 42 #include <linux/types.h> 43 #include <linux/kernel.h> 44 #include <linux/sched.h> 45 #include <linux/mm.h> 46 #include <linux/interrupt.h> 47 #include <linux/in.h> 48 #include <linux/inet.h> 49 #include <linux/slab.h> 50 #include <linux/netdevice.h> 51 #ifdef CONFIG_NET_CLS_ACT 52 #include <net/pkt_sched.h> 53 #endif 54 #include <linux/string.h> 55 #include <linux/skbuff.h> 56 #include <linux/cache.h> 57 #include <linux/rtnetlink.h> 58 #include <linux/init.h> 59 #include <linux/highmem.h> 60 61 #include <net/protocol.h> 62 #include <net/dst.h> 63 #include <net/sock.h> 64 #include <net/checksum.h> 65 #include <net/xfrm.h> 66 67 #include <asm/uaccess.h> 68 #include <asm/system.h> 69 70 static kmem_cache_t *skbuff_head_cache __read_mostly; 71 static kmem_cache_t *skbuff_fclone_cache __read_mostly; 72 73 /* 74 * lockdep: lock class key used by skb_queue_head_init(): 75 */ 76 struct lock_class_key skb_queue_lock_key; 77 78 EXPORT_SYMBOL(skb_queue_lock_key); 79 80 /* 81 * Keep out-of-line to prevent kernel bloat. 82 * __builtin_return_address is not used because it is not always 83 * reliable. 84 */ 85 86 /** 87 * skb_over_panic - private function 88 * @skb: buffer 89 * @sz: size 90 * @here: address 91 * 92 * Out of line support code for skb_put(). Not user callable. 93 */ 94 void skb_over_panic(struct sk_buff *skb, int sz, void *here) 95 { 96 printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p " 97 "data:%p tail:%p end:%p dev:%s\n", 98 here, skb->len, sz, skb->head, skb->data, skb->tail, skb->end, 99 skb->dev ? skb->dev->name : "<NULL>"); 100 BUG(); 101 } 102 103 /** 104 * skb_under_panic - private function 105 * @skb: buffer 106 * @sz: size 107 * @here: address 108 * 109 * Out of line support code for skb_push(). Not user callable. 110 */ 111 112 void skb_under_panic(struct sk_buff *skb, int sz, void *here) 113 { 114 printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p " 115 "data:%p tail:%p end:%p dev:%s\n", 116 here, skb->len, sz, skb->head, skb->data, skb->tail, skb->end, 117 skb->dev ? skb->dev->name : "<NULL>"); 118 BUG(); 119 } 120 121 void skb_truesize_bug(struct sk_buff *skb) 122 { 123 printk(KERN_ERR "SKB BUG: Invalid truesize (%u) " 124 "len=%u, sizeof(sk_buff)=%Zd\n", 125 skb->truesize, skb->len, sizeof(struct sk_buff)); 126 } 127 EXPORT_SYMBOL(skb_truesize_bug); 128 129 /* Allocate a new skbuff. We do this ourselves so we can fill in a few 130 * 'private' fields and also do memory statistics to find all the 131 * [BEEP] leaks. 132 * 133 */ 134 135 /** 136 * __alloc_skb - allocate a network buffer 137 * @size: size to allocate 138 * @gfp_mask: allocation mask 139 * @fclone: allocate from fclone cache instead of head cache 140 * and allocate a cloned (child) skb 141 * 142 * Allocate a new &sk_buff. The returned buffer has no headroom and a 143 * tail room of size bytes. The object has a reference count of one. 144 * The return is the buffer. On a failure the return is %NULL. 145 * 146 * Buffers may only be allocated from interrupts using a @gfp_mask of 147 * %GFP_ATOMIC. 148 */ 149 struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, 150 int fclone) 151 { 152 kmem_cache_t *cache; 153 struct skb_shared_info *shinfo; 154 struct sk_buff *skb; 155 u8 *data; 156 157 cache = fclone ? skbuff_fclone_cache : skbuff_head_cache; 158 159 /* Get the HEAD */ 160 skb = kmem_cache_alloc(cache, gfp_mask & ~__GFP_DMA); 161 if (!skb) 162 goto out; 163 164 /* Get the DATA. Size must match skb_add_mtu(). */ 165 size = SKB_DATA_ALIGN(size); 166 data = ____kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); 167 if (!data) 168 goto nodata; 169 170 memset(skb, 0, offsetof(struct sk_buff, truesize)); 171 skb->truesize = size + sizeof(struct sk_buff); 172 atomic_set(&skb->users, 1); 173 skb->head = data; 174 skb->data = data; 175 skb->tail = data; 176 skb->end = data + size; 177 /* make sure we initialize shinfo sequentially */ 178 shinfo = skb_shinfo(skb); 179 atomic_set(&shinfo->dataref, 1); 180 shinfo->nr_frags = 0; 181 shinfo->gso_size = 0; 182 shinfo->gso_segs = 0; 183 shinfo->gso_type = 0; 184 shinfo->ip6_frag_id = 0; 185 shinfo->frag_list = NULL; 186 187 if (fclone) { 188 struct sk_buff *child = skb + 1; 189 atomic_t *fclone_ref = (atomic_t *) (child + 1); 190 191 skb->fclone = SKB_FCLONE_ORIG; 192 atomic_set(fclone_ref, 1); 193 194 child->fclone = SKB_FCLONE_UNAVAILABLE; 195 } 196 out: 197 return skb; 198 nodata: 199 kmem_cache_free(cache, skb); 200 skb = NULL; 201 goto out; 202 } 203 204 /** 205 * alloc_skb_from_cache - allocate a network buffer 206 * @cp: kmem_cache from which to allocate the data area 207 * (object size must be big enough for @size bytes + skb overheads) 208 * @size: size to allocate 209 * @gfp_mask: allocation mask 210 * 211 * Allocate a new &sk_buff. The returned buffer has no headroom and 212 * tail room of size bytes. The object has a reference count of one. 213 * The return is the buffer. On a failure the return is %NULL. 214 * 215 * Buffers may only be allocated from interrupts using a @gfp_mask of 216 * %GFP_ATOMIC. 217 */ 218 struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, 219 unsigned int size, 220 gfp_t gfp_mask) 221 { 222 struct sk_buff *skb; 223 u8 *data; 224 225 /* Get the HEAD */ 226 skb = kmem_cache_alloc(skbuff_head_cache, 227 gfp_mask & ~__GFP_DMA); 228 if (!skb) 229 goto out; 230 231 /* Get the DATA. */ 232 size = SKB_DATA_ALIGN(size); 233 data = kmem_cache_alloc(cp, gfp_mask); 234 if (!data) 235 goto nodata; 236 237 memset(skb, 0, offsetof(struct sk_buff, truesize)); 238 skb->truesize = size + sizeof(struct sk_buff); 239 atomic_set(&skb->users, 1); 240 skb->head = data; 241 skb->data = data; 242 skb->tail = data; 243 skb->end = data + size; 244 245 atomic_set(&(skb_shinfo(skb)->dataref), 1); 246 skb_shinfo(skb)->nr_frags = 0; 247 skb_shinfo(skb)->gso_size = 0; 248 skb_shinfo(skb)->gso_segs = 0; 249 skb_shinfo(skb)->gso_type = 0; 250 skb_shinfo(skb)->frag_list = NULL; 251 out: 252 return skb; 253 nodata: 254 kmem_cache_free(skbuff_head_cache, skb); 255 skb = NULL; 256 goto out; 257 } 258 259 260 static void skb_drop_fraglist(struct sk_buff *skb) 261 { 262 struct sk_buff *list = skb_shinfo(skb)->frag_list; 263 264 skb_shinfo(skb)->frag_list = NULL; 265 266 do { 267 struct sk_buff *this = list; 268 list = list->next; 269 kfree_skb(this); 270 } while (list); 271 } 272 273 static void skb_clone_fraglist(struct sk_buff *skb) 274 { 275 struct sk_buff *list; 276 277 for (list = skb_shinfo(skb)->frag_list; list; list = list->next) 278 skb_get(list); 279 } 280 281 static void skb_release_data(struct sk_buff *skb) 282 { 283 if (!skb->cloned || 284 !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1, 285 &skb_shinfo(skb)->dataref)) { 286 if (skb_shinfo(skb)->nr_frags) { 287 int i; 288 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 289 put_page(skb_shinfo(skb)->frags[i].page); 290 } 291 292 if (skb_shinfo(skb)->frag_list) 293 skb_drop_fraglist(skb); 294 295 kfree(skb->head); 296 } 297 } 298 299 /* 300 * Free an skbuff by memory without cleaning the state. 301 */ 302 void kfree_skbmem(struct sk_buff *skb) 303 { 304 struct sk_buff *other; 305 atomic_t *fclone_ref; 306 307 skb_release_data(skb); 308 switch (skb->fclone) { 309 case SKB_FCLONE_UNAVAILABLE: 310 kmem_cache_free(skbuff_head_cache, skb); 311 break; 312 313 case SKB_FCLONE_ORIG: 314 fclone_ref = (atomic_t *) (skb + 2); 315 if (atomic_dec_and_test(fclone_ref)) 316 kmem_cache_free(skbuff_fclone_cache, skb); 317 break; 318 319 case SKB_FCLONE_CLONE: 320 fclone_ref = (atomic_t *) (skb + 1); 321 other = skb - 1; 322 323 /* The clone portion is available for 324 * fast-cloning again. 325 */ 326 skb->fclone = SKB_FCLONE_UNAVAILABLE; 327 328 if (atomic_dec_and_test(fclone_ref)) 329 kmem_cache_free(skbuff_fclone_cache, other); 330 break; 331 }; 332 } 333 334 /** 335 * __kfree_skb - private function 336 * @skb: buffer 337 * 338 * Free an sk_buff. Release anything attached to the buffer. 339 * Clean the state. This is an internal helper function. Users should 340 * always call kfree_skb 341 */ 342 343 void __kfree_skb(struct sk_buff *skb) 344 { 345 dst_release(skb->dst); 346 #ifdef CONFIG_XFRM 347 secpath_put(skb->sp); 348 #endif 349 if (skb->destructor) { 350 WARN_ON(in_irq()); 351 skb->destructor(skb); 352 } 353 #ifdef CONFIG_NETFILTER 354 nf_conntrack_put(skb->nfct); 355 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 356 nf_conntrack_put_reasm(skb->nfct_reasm); 357 #endif 358 #ifdef CONFIG_BRIDGE_NETFILTER 359 nf_bridge_put(skb->nf_bridge); 360 #endif 361 #endif 362 /* XXX: IS this still necessary? - JHS */ 363 #ifdef CONFIG_NET_SCHED 364 skb->tc_index = 0; 365 #ifdef CONFIG_NET_CLS_ACT 366 skb->tc_verd = 0; 367 #endif 368 #endif 369 370 kfree_skbmem(skb); 371 } 372 373 /** 374 * kfree_skb - free an sk_buff 375 * @skb: buffer to free 376 * 377 * Drop a reference to the buffer and free it if the usage count has 378 * hit zero. 379 */ 380 void kfree_skb(struct sk_buff *skb) 381 { 382 if (unlikely(!skb)) 383 return; 384 if (likely(atomic_read(&skb->users) == 1)) 385 smp_rmb(); 386 else if (likely(!atomic_dec_and_test(&skb->users))) 387 return; 388 __kfree_skb(skb); 389 } 390 391 /** 392 * skb_clone - duplicate an sk_buff 393 * @skb: buffer to clone 394 * @gfp_mask: allocation priority 395 * 396 * Duplicate an &sk_buff. The new one is not owned by a socket. Both 397 * copies share the same packet data but not structure. The new 398 * buffer has a reference count of 1. If the allocation fails the 399 * function returns %NULL otherwise the new buffer is returned. 400 * 401 * If this function is called from an interrupt gfp_mask() must be 402 * %GFP_ATOMIC. 403 */ 404 405 struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) 406 { 407 struct sk_buff *n; 408 409 n = skb + 1; 410 if (skb->fclone == SKB_FCLONE_ORIG && 411 n->fclone == SKB_FCLONE_UNAVAILABLE) { 412 atomic_t *fclone_ref = (atomic_t *) (n + 1); 413 n->fclone = SKB_FCLONE_CLONE; 414 atomic_inc(fclone_ref); 415 } else { 416 n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); 417 if (!n) 418 return NULL; 419 n->fclone = SKB_FCLONE_UNAVAILABLE; 420 } 421 422 #define C(x) n->x = skb->x 423 424 n->next = n->prev = NULL; 425 n->sk = NULL; 426 C(tstamp); 427 C(dev); 428 C(h); 429 C(nh); 430 C(mac); 431 C(dst); 432 dst_clone(skb->dst); 433 C(sp); 434 #ifdef CONFIG_INET 435 secpath_get(skb->sp); 436 #endif 437 memcpy(n->cb, skb->cb, sizeof(skb->cb)); 438 C(len); 439 C(data_len); 440 C(csum); 441 C(local_df); 442 n->cloned = 1; 443 n->nohdr = 0; 444 C(pkt_type); 445 C(ip_summed); 446 C(priority); 447 #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) 448 C(ipvs_property); 449 #endif 450 C(protocol); 451 n->destructor = NULL; 452 #ifdef CONFIG_NETFILTER 453 C(nfmark); 454 C(nfct); 455 nf_conntrack_get(skb->nfct); 456 C(nfctinfo); 457 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 458 C(nfct_reasm); 459 nf_conntrack_get_reasm(skb->nfct_reasm); 460 #endif 461 #ifdef CONFIG_BRIDGE_NETFILTER 462 C(nf_bridge); 463 nf_bridge_get(skb->nf_bridge); 464 #endif 465 #endif /*CONFIG_NETFILTER*/ 466 #ifdef CONFIG_NET_SCHED 467 C(tc_index); 468 #ifdef CONFIG_NET_CLS_ACT 469 n->tc_verd = SET_TC_VERD(skb->tc_verd,0); 470 n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd); 471 n->tc_verd = CLR_TC_MUNGED(n->tc_verd); 472 C(input_dev); 473 #endif 474 skb_copy_secmark(n, skb); 475 #endif 476 C(truesize); 477 atomic_set(&n->users, 1); 478 C(head); 479 C(data); 480 C(tail); 481 C(end); 482 483 atomic_inc(&(skb_shinfo(skb)->dataref)); 484 skb->cloned = 1; 485 486 return n; 487 } 488 489 static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) 490 { 491 /* 492 * Shift between the two data areas in bytes 493 */ 494 unsigned long offset = new->data - old->data; 495 496 new->sk = NULL; 497 new->dev = old->dev; 498 new->priority = old->priority; 499 new->protocol = old->protocol; 500 new->dst = dst_clone(old->dst); 501 #ifdef CONFIG_INET 502 new->sp = secpath_get(old->sp); 503 #endif 504 new->h.raw = old->h.raw + offset; 505 new->nh.raw = old->nh.raw + offset; 506 new->mac.raw = old->mac.raw + offset; 507 memcpy(new->cb, old->cb, sizeof(old->cb)); 508 new->local_df = old->local_df; 509 new->fclone = SKB_FCLONE_UNAVAILABLE; 510 new->pkt_type = old->pkt_type; 511 new->tstamp = old->tstamp; 512 new->destructor = NULL; 513 #ifdef CONFIG_NETFILTER 514 new->nfmark = old->nfmark; 515 new->nfct = old->nfct; 516 nf_conntrack_get(old->nfct); 517 new->nfctinfo = old->nfctinfo; 518 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 519 new->nfct_reasm = old->nfct_reasm; 520 nf_conntrack_get_reasm(old->nfct_reasm); 521 #endif 522 #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) 523 new->ipvs_property = old->ipvs_property; 524 #endif 525 #ifdef CONFIG_BRIDGE_NETFILTER 526 new->nf_bridge = old->nf_bridge; 527 nf_bridge_get(old->nf_bridge); 528 #endif 529 #endif 530 #ifdef CONFIG_NET_SCHED 531 #ifdef CONFIG_NET_CLS_ACT 532 new->tc_verd = old->tc_verd; 533 #endif 534 new->tc_index = old->tc_index; 535 #endif 536 skb_copy_secmark(new, old); 537 atomic_set(&new->users, 1); 538 skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; 539 skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; 540 skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; 541 } 542 543 /** 544 * skb_copy - create private copy of an sk_buff 545 * @skb: buffer to copy 546 * @gfp_mask: allocation priority 547 * 548 * Make a copy of both an &sk_buff and its data. This is used when the 549 * caller wishes to modify the data and needs a private copy of the 550 * data to alter. Returns %NULL on failure or the pointer to the buffer 551 * on success. The returned buffer has a reference count of 1. 552 * 553 * As by-product this function converts non-linear &sk_buff to linear 554 * one, so that &sk_buff becomes completely private and caller is allowed 555 * to modify all the data of returned buffer. This means that this 556 * function is not recommended for use in circumstances when only 557 * header is going to be modified. Use pskb_copy() instead. 558 */ 559 560 struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) 561 { 562 int headerlen = skb->data - skb->head; 563 /* 564 * Allocate the copy buffer 565 */ 566 struct sk_buff *n = alloc_skb(skb->end - skb->head + skb->data_len, 567 gfp_mask); 568 if (!n) 569 return NULL; 570 571 /* Set the data pointer */ 572 skb_reserve(n, headerlen); 573 /* Set the tail pointer and length */ 574 skb_put(n, skb->len); 575 n->csum = skb->csum; 576 n->ip_summed = skb->ip_summed; 577 578 if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)) 579 BUG(); 580 581 copy_skb_header(n, skb); 582 return n; 583 } 584 585 586 /** 587 * pskb_copy - create copy of an sk_buff with private head. 588 * @skb: buffer to copy 589 * @gfp_mask: allocation priority 590 * 591 * Make a copy of both an &sk_buff and part of its data, located 592 * in header. Fragmented data remain shared. This is used when 593 * the caller wishes to modify only header of &sk_buff and needs 594 * private copy of the header to alter. Returns %NULL on failure 595 * or the pointer to the buffer on success. 596 * The returned buffer has a reference count of 1. 597 */ 598 599 struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) 600 { 601 /* 602 * Allocate the copy buffer 603 */ 604 struct sk_buff *n = alloc_skb(skb->end - skb->head, gfp_mask); 605 606 if (!n) 607 goto out; 608 609 /* Set the data pointer */ 610 skb_reserve(n, skb->data - skb->head); 611 /* Set the tail pointer and length */ 612 skb_put(n, skb_headlen(skb)); 613 /* Copy the bytes */ 614 memcpy(n->data, skb->data, n->len); 615 n->csum = skb->csum; 616 n->ip_summed = skb->ip_summed; 617 618 n->data_len = skb->data_len; 619 n->len = skb->len; 620 621 if (skb_shinfo(skb)->nr_frags) { 622 int i; 623 624 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 625 skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; 626 get_page(skb_shinfo(n)->frags[i].page); 627 } 628 skb_shinfo(n)->nr_frags = i; 629 } 630 631 if (skb_shinfo(skb)->frag_list) { 632 skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; 633 skb_clone_fraglist(n); 634 } 635 636 copy_skb_header(n, skb); 637 out: 638 return n; 639 } 640 641 /** 642 * pskb_expand_head - reallocate header of &sk_buff 643 * @skb: buffer to reallocate 644 * @nhead: room to add at head 645 * @ntail: room to add at tail 646 * @gfp_mask: allocation priority 647 * 648 * Expands (or creates identical copy, if &nhead and &ntail are zero) 649 * header of skb. &sk_buff itself is not changed. &sk_buff MUST have 650 * reference count of 1. Returns zero in the case of success or error, 651 * if expansion failed. In the last case, &sk_buff is not changed. 652 * 653 * All the pointers pointing into skb header may change and must be 654 * reloaded after call to this function. 655 */ 656 657 int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, 658 gfp_t gfp_mask) 659 { 660 int i; 661 u8 *data; 662 int size = nhead + (skb->end - skb->head) + ntail; 663 long off; 664 665 if (skb_shared(skb)) 666 BUG(); 667 668 size = SKB_DATA_ALIGN(size); 669 670 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); 671 if (!data) 672 goto nodata; 673 674 /* Copy only real data... and, alas, header. This should be 675 * optimized for the cases when header is void. */ 676 memcpy(data + nhead, skb->head, skb->tail - skb->head); 677 memcpy(data + size, skb->end, sizeof(struct skb_shared_info)); 678 679 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 680 get_page(skb_shinfo(skb)->frags[i].page); 681 682 if (skb_shinfo(skb)->frag_list) 683 skb_clone_fraglist(skb); 684 685 skb_release_data(skb); 686 687 off = (data + nhead) - skb->head; 688 689 skb->head = data; 690 skb->end = data + size; 691 skb->data += off; 692 skb->tail += off; 693 skb->mac.raw += off; 694 skb->h.raw += off; 695 skb->nh.raw += off; 696 skb->cloned = 0; 697 skb->nohdr = 0; 698 atomic_set(&skb_shinfo(skb)->dataref, 1); 699 return 0; 700 701 nodata: 702 return -ENOMEM; 703 } 704 705 /* Make private copy of skb with writable head and some headroom */ 706 707 struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) 708 { 709 struct sk_buff *skb2; 710 int delta = headroom - skb_headroom(skb); 711 712 if (delta <= 0) 713 skb2 = pskb_copy(skb, GFP_ATOMIC); 714 else { 715 skb2 = skb_clone(skb, GFP_ATOMIC); 716 if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, 717 GFP_ATOMIC)) { 718 kfree_skb(skb2); 719 skb2 = NULL; 720 } 721 } 722 return skb2; 723 } 724 725 726 /** 727 * skb_copy_expand - copy and expand sk_buff 728 * @skb: buffer to copy 729 * @newheadroom: new free bytes at head 730 * @newtailroom: new free bytes at tail 731 * @gfp_mask: allocation priority 732 * 733 * Make a copy of both an &sk_buff and its data and while doing so 734 * allocate additional space. 735 * 736 * This is used when the caller wishes to modify the data and needs a 737 * private copy of the data to alter as well as more space for new fields. 738 * Returns %NULL on failure or the pointer to the buffer 739 * on success. The returned buffer has a reference count of 1. 740 * 741 * You must pass %GFP_ATOMIC as the allocation priority if this function 742 * is called from an interrupt. 743 * 744 * BUG ALERT: ip_summed is not copied. Why does this work? Is it used 745 * only by netfilter in the cases when checksum is recalculated? --ANK 746 */ 747 struct sk_buff *skb_copy_expand(const struct sk_buff *skb, 748 int newheadroom, int newtailroom, 749 gfp_t gfp_mask) 750 { 751 /* 752 * Allocate the copy buffer 753 */ 754 struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom, 755 gfp_mask); 756 int head_copy_len, head_copy_off; 757 758 if (!n) 759 return NULL; 760 761 skb_reserve(n, newheadroom); 762 763 /* Set the tail pointer and length */ 764 skb_put(n, skb->len); 765 766 head_copy_len = skb_headroom(skb); 767 head_copy_off = 0; 768 if (newheadroom <= head_copy_len) 769 head_copy_len = newheadroom; 770 else 771 head_copy_off = newheadroom - head_copy_len; 772 773 /* Copy the linear header and data. */ 774 if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off, 775 skb->len + head_copy_len)) 776 BUG(); 777 778 copy_skb_header(n, skb); 779 780 return n; 781 } 782 783 /** 784 * skb_pad - zero pad the tail of an skb 785 * @skb: buffer to pad 786 * @pad: space to pad 787 * 788 * Ensure that a buffer is followed by a padding area that is zero 789 * filled. Used by network drivers which may DMA or transfer data 790 * beyond the buffer end onto the wire. 791 * 792 * May return error in out of memory cases. The skb is freed on error. 793 */ 794 795 int skb_pad(struct sk_buff *skb, int pad) 796 { 797 int err; 798 int ntail; 799 800 /* If the skbuff is non linear tailroom is always zero.. */ 801 if (!skb_cloned(skb) && skb_tailroom(skb) >= pad) { 802 memset(skb->data+skb->len, 0, pad); 803 return 0; 804 } 805 806 ntail = skb->data_len + pad - (skb->end - skb->tail); 807 if (likely(skb_cloned(skb) || ntail > 0)) { 808 err = pskb_expand_head(skb, 0, ntail, GFP_ATOMIC); 809 if (unlikely(err)) 810 goto free_skb; 811 } 812 813 /* FIXME: The use of this function with non-linear skb's really needs 814 * to be audited. 815 */ 816 err = skb_linearize(skb); 817 if (unlikely(err)) 818 goto free_skb; 819 820 memset(skb->data + skb->len, 0, pad); 821 return 0; 822 823 free_skb: 824 kfree_skb(skb); 825 return err; 826 } 827 828 /* Trims skb to length len. It can change skb pointers. 829 */ 830 831 int ___pskb_trim(struct sk_buff *skb, unsigned int len) 832 { 833 int offset = skb_headlen(skb); 834 int nfrags = skb_shinfo(skb)->nr_frags; 835 int i; 836 837 for (i = 0; i < nfrags; i++) { 838 int end = offset + skb_shinfo(skb)->frags[i].size; 839 if (end > len) { 840 if (skb_cloned(skb)) { 841 if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) 842 return -ENOMEM; 843 } 844 if (len <= offset) { 845 put_page(skb_shinfo(skb)->frags[i].page); 846 skb_shinfo(skb)->nr_frags--; 847 } else { 848 skb_shinfo(skb)->frags[i].size = len - offset; 849 } 850 } 851 offset = end; 852 } 853 854 if (offset < len) { 855 skb->data_len -= skb->len - len; 856 skb->len = len; 857 } else { 858 if (len <= skb_headlen(skb)) { 859 skb->len = len; 860 skb->data_len = 0; 861 skb->tail = skb->data + len; 862 if (skb_shinfo(skb)->frag_list && !skb_cloned(skb)) 863 skb_drop_fraglist(skb); 864 } else { 865 skb->data_len -= skb->len - len; 866 skb->len = len; 867 } 868 } 869 870 return 0; 871 } 872 873 /** 874 * __pskb_pull_tail - advance tail of skb header 875 * @skb: buffer to reallocate 876 * @delta: number of bytes to advance tail 877 * 878 * The function makes a sense only on a fragmented &sk_buff, 879 * it expands header moving its tail forward and copying necessary 880 * data from fragmented part. 881 * 882 * &sk_buff MUST have reference count of 1. 883 * 884 * Returns %NULL (and &sk_buff does not change) if pull failed 885 * or value of new tail of skb in the case of success. 886 * 887 * All the pointers pointing into skb header may change and must be 888 * reloaded after call to this function. 889 */ 890 891 /* Moves tail of skb head forward, copying data from fragmented part, 892 * when it is necessary. 893 * 1. It may fail due to malloc failure. 894 * 2. It may change skb pointers. 895 * 896 * It is pretty complicated. Luckily, it is called only in exceptional cases. 897 */ 898 unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta) 899 { 900 /* If skb has not enough free space at tail, get new one 901 * plus 128 bytes for future expansions. If we have enough 902 * room at tail, reallocate without expansion only if skb is cloned. 903 */ 904 int i, k, eat = (skb->tail + delta) - skb->end; 905 906 if (eat > 0 || skb_cloned(skb)) { 907 if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0, 908 GFP_ATOMIC)) 909 return NULL; 910 } 911 912 if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta)) 913 BUG(); 914 915 /* Optimization: no fragments, no reasons to preestimate 916 * size of pulled pages. Superb. 917 */ 918 if (!skb_shinfo(skb)->frag_list) 919 goto pull_pages; 920 921 /* Estimate size of pulled pages. */ 922 eat = delta; 923 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 924 if (skb_shinfo(skb)->frags[i].size >= eat) 925 goto pull_pages; 926 eat -= skb_shinfo(skb)->frags[i].size; 927 } 928 929 /* If we need update frag list, we are in troubles. 930 * Certainly, it possible to add an offset to skb data, 931 * but taking into account that pulling is expected to 932 * be very rare operation, it is worth to fight against 933 * further bloating skb head and crucify ourselves here instead. 934 * Pure masohism, indeed. 8)8) 935 */ 936 if (eat) { 937 struct sk_buff *list = skb_shinfo(skb)->frag_list; 938 struct sk_buff *clone = NULL; 939 struct sk_buff *insp = NULL; 940 941 do { 942 BUG_ON(!list); 943 944 if (list->len <= eat) { 945 /* Eaten as whole. */ 946 eat -= list->len; 947 list = list->next; 948 insp = list; 949 } else { 950 /* Eaten partially. */ 951 952 if (skb_shared(list)) { 953 /* Sucks! We need to fork list. :-( */ 954 clone = skb_clone(list, GFP_ATOMIC); 955 if (!clone) 956 return NULL; 957 insp = list->next; 958 list = clone; 959 } else { 960 /* This may be pulled without 961 * problems. */ 962 insp = list; 963 } 964 if (!pskb_pull(list, eat)) { 965 if (clone) 966 kfree_skb(clone); 967 return NULL; 968 } 969 break; 970 } 971 } while (eat); 972 973 /* Free pulled out fragments. */ 974 while ((list = skb_shinfo(skb)->frag_list) != insp) { 975 skb_shinfo(skb)->frag_list = list->next; 976 kfree_skb(list); 977 } 978 /* And insert new clone at head. */ 979 if (clone) { 980 clone->next = list; 981 skb_shinfo(skb)->frag_list = clone; 982 } 983 } 984 /* Success! Now we may commit changes to skb data. */ 985 986 pull_pages: 987 eat = delta; 988 k = 0; 989 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 990 if (skb_shinfo(skb)->frags[i].size <= eat) { 991 put_page(skb_shinfo(skb)->frags[i].page); 992 eat -= skb_shinfo(skb)->frags[i].size; 993 } else { 994 skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; 995 if (eat) { 996 skb_shinfo(skb)->frags[k].page_offset += eat; 997 skb_shinfo(skb)->frags[k].size -= eat; 998 eat = 0; 999 } 1000 k++; 1001 } 1002 } 1003 skb_shinfo(skb)->nr_frags = k; 1004 1005 skb->tail += delta; 1006 skb->data_len -= delta; 1007 1008 return skb->tail; 1009 } 1010 1011 /* Copy some data bits from skb to kernel buffer. */ 1012 1013 int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) 1014 { 1015 int i, copy; 1016 int start = skb_headlen(skb); 1017 1018 if (offset > (int)skb->len - len) 1019 goto fault; 1020 1021 /* Copy header. */ 1022 if ((copy = start - offset) > 0) { 1023 if (copy > len) 1024 copy = len; 1025 memcpy(to, skb->data + offset, copy); 1026 if ((len -= copy) == 0) 1027 return 0; 1028 offset += copy; 1029 to += copy; 1030 } 1031 1032 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1033 int end; 1034 1035 BUG_TRAP(start <= offset + len); 1036 1037 end = start + skb_shinfo(skb)->frags[i].size; 1038 if ((copy = end - offset) > 0) { 1039 u8 *vaddr; 1040 1041 if (copy > len) 1042 copy = len; 1043 1044 vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); 1045 memcpy(to, 1046 vaddr + skb_shinfo(skb)->frags[i].page_offset+ 1047 offset - start, copy); 1048 kunmap_skb_frag(vaddr); 1049 1050 if ((len -= copy) == 0) 1051 return 0; 1052 offset += copy; 1053 to += copy; 1054 } 1055 start = end; 1056 } 1057 1058 if (skb_shinfo(skb)->frag_list) { 1059 struct sk_buff *list = skb_shinfo(skb)->frag_list; 1060 1061 for (; list; list = list->next) { 1062 int end; 1063 1064 BUG_TRAP(start <= offset + len); 1065 1066 end = start + list->len; 1067 if ((copy = end - offset) > 0) { 1068 if (copy > len) 1069 copy = len; 1070 if (skb_copy_bits(list, offset - start, 1071 to, copy)) 1072 goto fault; 1073 if ((len -= copy) == 0) 1074 return 0; 1075 offset += copy; 1076 to += copy; 1077 } 1078 start = end; 1079 } 1080 } 1081 if (!len) 1082 return 0; 1083 1084 fault: 1085 return -EFAULT; 1086 } 1087 1088 /** 1089 * skb_store_bits - store bits from kernel buffer to skb 1090 * @skb: destination buffer 1091 * @offset: offset in destination 1092 * @from: source buffer 1093 * @len: number of bytes to copy 1094 * 1095 * Copy the specified number of bytes from the source buffer to the 1096 * destination skb. This function handles all the messy bits of 1097 * traversing fragment lists and such. 1098 */ 1099 1100 int skb_store_bits(const struct sk_buff *skb, int offset, void *from, int len) 1101 { 1102 int i, copy; 1103 int start = skb_headlen(skb); 1104 1105 if (offset > (int)skb->len - len) 1106 goto fault; 1107 1108 if ((copy = start - offset) > 0) { 1109 if (copy > len) 1110 copy = len; 1111 memcpy(skb->data + offset, from, copy); 1112 if ((len -= copy) == 0) 1113 return 0; 1114 offset += copy; 1115 from += copy; 1116 } 1117 1118 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1119 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 1120 int end; 1121 1122 BUG_TRAP(start <= offset + len); 1123 1124 end = start + frag->size; 1125 if ((copy = end - offset) > 0) { 1126 u8 *vaddr; 1127 1128 if (copy > len) 1129 copy = len; 1130 1131 vaddr = kmap_skb_frag(frag); 1132 memcpy(vaddr + frag->page_offset + offset - start, 1133 from, copy); 1134 kunmap_skb_frag(vaddr); 1135 1136 if ((len -= copy) == 0) 1137 return 0; 1138 offset += copy; 1139 from += copy; 1140 } 1141 start = end; 1142 } 1143 1144 if (skb_shinfo(skb)->frag_list) { 1145 struct sk_buff *list = skb_shinfo(skb)->frag_list; 1146 1147 for (; list; list = list->next) { 1148 int end; 1149 1150 BUG_TRAP(start <= offset + len); 1151 1152 end = start + list->len; 1153 if ((copy = end - offset) > 0) { 1154 if (copy > len) 1155 copy = len; 1156 if (skb_store_bits(list, offset - start, 1157 from, copy)) 1158 goto fault; 1159 if ((len -= copy) == 0) 1160 return 0; 1161 offset += copy; 1162 from += copy; 1163 } 1164 start = end; 1165 } 1166 } 1167 if (!len) 1168 return 0; 1169 1170 fault: 1171 return -EFAULT; 1172 } 1173 1174 EXPORT_SYMBOL(skb_store_bits); 1175 1176 /* Checksum skb data. */ 1177 1178 unsigned int skb_checksum(const struct sk_buff *skb, int offset, 1179 int len, unsigned int csum) 1180 { 1181 int start = skb_headlen(skb); 1182 int i, copy = start - offset; 1183 int pos = 0; 1184 1185 /* Checksum header. */ 1186 if (copy > 0) { 1187 if (copy > len) 1188 copy = len; 1189 csum = csum_partial(skb->data + offset, copy, csum); 1190 if ((len -= copy) == 0) 1191 return csum; 1192 offset += copy; 1193 pos = copy; 1194 } 1195 1196 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1197 int end; 1198 1199 BUG_TRAP(start <= offset + len); 1200 1201 end = start + skb_shinfo(skb)->frags[i].size; 1202 if ((copy = end - offset) > 0) { 1203 unsigned int csum2; 1204 u8 *vaddr; 1205 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 1206 1207 if (copy > len) 1208 copy = len; 1209 vaddr = kmap_skb_frag(frag); 1210 csum2 = csum_partial(vaddr + frag->page_offset + 1211 offset - start, copy, 0); 1212 kunmap_skb_frag(vaddr); 1213 csum = csum_block_add(csum, csum2, pos); 1214 if (!(len -= copy)) 1215 return csum; 1216 offset += copy; 1217 pos += copy; 1218 } 1219 start = end; 1220 } 1221 1222 if (skb_shinfo(skb)->frag_list) { 1223 struct sk_buff *list = skb_shinfo(skb)->frag_list; 1224 1225 for (; list; list = list->next) { 1226 int end; 1227 1228 BUG_TRAP(start <= offset + len); 1229 1230 end = start + list->len; 1231 if ((copy = end - offset) > 0) { 1232 unsigned int csum2; 1233 if (copy > len) 1234 copy = len; 1235 csum2 = skb_checksum(list, offset - start, 1236 copy, 0); 1237 csum = csum_block_add(csum, csum2, pos); 1238 if ((len -= copy) == 0) 1239 return csum; 1240 offset += copy; 1241 pos += copy; 1242 } 1243 start = end; 1244 } 1245 } 1246 BUG_ON(len); 1247 1248 return csum; 1249 } 1250 1251 /* Both of above in one bottle. */ 1252 1253 unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, 1254 u8 *to, int len, unsigned int csum) 1255 { 1256 int start = skb_headlen(skb); 1257 int i, copy = start - offset; 1258 int pos = 0; 1259 1260 /* Copy header. */ 1261 if (copy > 0) { 1262 if (copy > len) 1263 copy = len; 1264 csum = csum_partial_copy_nocheck(skb->data + offset, to, 1265 copy, csum); 1266 if ((len -= copy) == 0) 1267 return csum; 1268 offset += copy; 1269 to += copy; 1270 pos = copy; 1271 } 1272 1273 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1274 int end; 1275 1276 BUG_TRAP(start <= offset + len); 1277 1278 end = start + skb_shinfo(skb)->frags[i].size; 1279 if ((copy = end - offset) > 0) { 1280 unsigned int csum2; 1281 u8 *vaddr; 1282 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 1283 1284 if (copy > len) 1285 copy = len; 1286 vaddr = kmap_skb_frag(frag); 1287 csum2 = csum_partial_copy_nocheck(vaddr + 1288 frag->page_offset + 1289 offset - start, to, 1290 copy, 0); 1291 kunmap_skb_frag(vaddr); 1292 csum = csum_block_add(csum, csum2, pos); 1293 if (!(len -= copy)) 1294 return csum; 1295 offset += copy; 1296 to += copy; 1297 pos += copy; 1298 } 1299 start = end; 1300 } 1301 1302 if (skb_shinfo(skb)->frag_list) { 1303 struct sk_buff *list = skb_shinfo(skb)->frag_list; 1304 1305 for (; list; list = list->next) { 1306 unsigned int csum2; 1307 int end; 1308 1309 BUG_TRAP(start <= offset + len); 1310 1311 end = start + list->len; 1312 if ((copy = end - offset) > 0) { 1313 if (copy > len) 1314 copy = len; 1315 csum2 = skb_copy_and_csum_bits(list, 1316 offset - start, 1317 to, copy, 0); 1318 csum = csum_block_add(csum, csum2, pos); 1319 if ((len -= copy) == 0) 1320 return csum; 1321 offset += copy; 1322 to += copy; 1323 pos += copy; 1324 } 1325 start = end; 1326 } 1327 } 1328 BUG_ON(len); 1329 return csum; 1330 } 1331 1332 void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) 1333 { 1334 unsigned int csum; 1335 long csstart; 1336 1337 if (skb->ip_summed == CHECKSUM_HW) 1338 csstart = skb->h.raw - skb->data; 1339 else 1340 csstart = skb_headlen(skb); 1341 1342 BUG_ON(csstart > skb_headlen(skb)); 1343 1344 memcpy(to, skb->data, csstart); 1345 1346 csum = 0; 1347 if (csstart != skb->len) 1348 csum = skb_copy_and_csum_bits(skb, csstart, to + csstart, 1349 skb->len - csstart, 0); 1350 1351 if (skb->ip_summed == CHECKSUM_HW) { 1352 long csstuff = csstart + skb->csum; 1353 1354 *((unsigned short *)(to + csstuff)) = csum_fold(csum); 1355 } 1356 } 1357 1358 /** 1359 * skb_dequeue - remove from the head of the queue 1360 * @list: list to dequeue from 1361 * 1362 * Remove the head of the list. The list lock is taken so the function 1363 * may be used safely with other locking list functions. The head item is 1364 * returned or %NULL if the list is empty. 1365 */ 1366 1367 struct sk_buff *skb_dequeue(struct sk_buff_head *list) 1368 { 1369 unsigned long flags; 1370 struct sk_buff *result; 1371 1372 spin_lock_irqsave(&list->lock, flags); 1373 result = __skb_dequeue(list); 1374 spin_unlock_irqrestore(&list->lock, flags); 1375 return result; 1376 } 1377 1378 /** 1379 * skb_dequeue_tail - remove from the tail of the queue 1380 * @list: list to dequeue from 1381 * 1382 * Remove the tail of the list. The list lock is taken so the function 1383 * may be used safely with other locking list functions. The tail item is 1384 * returned or %NULL if the list is empty. 1385 */ 1386 struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list) 1387 { 1388 unsigned long flags; 1389 struct sk_buff *result; 1390 1391 spin_lock_irqsave(&list->lock, flags); 1392 result = __skb_dequeue_tail(list); 1393 spin_unlock_irqrestore(&list->lock, flags); 1394 return result; 1395 } 1396 1397 /** 1398 * skb_queue_purge - empty a list 1399 * @list: list to empty 1400 * 1401 * Delete all buffers on an &sk_buff list. Each buffer is removed from 1402 * the list and one reference dropped. This function takes the list 1403 * lock and is atomic with respect to other list locking functions. 1404 */ 1405 void skb_queue_purge(struct sk_buff_head *list) 1406 { 1407 struct sk_buff *skb; 1408 while ((skb = skb_dequeue(list)) != NULL) 1409 kfree_skb(skb); 1410 } 1411 1412 /** 1413 * skb_queue_head - queue a buffer at the list head 1414 * @list: list to use 1415 * @newsk: buffer to queue 1416 * 1417 * Queue a buffer at the start of the list. This function takes the 1418 * list lock and can be used safely with other locking &sk_buff functions 1419 * safely. 1420 * 1421 * A buffer cannot be placed on two lists at the same time. 1422 */ 1423 void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) 1424 { 1425 unsigned long flags; 1426 1427 spin_lock_irqsave(&list->lock, flags); 1428 __skb_queue_head(list, newsk); 1429 spin_unlock_irqrestore(&list->lock, flags); 1430 } 1431 1432 /** 1433 * skb_queue_tail - queue a buffer at the list tail 1434 * @list: list to use 1435 * @newsk: buffer to queue 1436 * 1437 * Queue a buffer at the tail of the list. This function takes the 1438 * list lock and can be used safely with other locking &sk_buff functions 1439 * safely. 1440 * 1441 * A buffer cannot be placed on two lists at the same time. 1442 */ 1443 void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) 1444 { 1445 unsigned long flags; 1446 1447 spin_lock_irqsave(&list->lock, flags); 1448 __skb_queue_tail(list, newsk); 1449 spin_unlock_irqrestore(&list->lock, flags); 1450 } 1451 1452 /** 1453 * skb_unlink - remove a buffer from a list 1454 * @skb: buffer to remove 1455 * @list: list to use 1456 * 1457 * Remove a packet from a list. The list locks are taken and this 1458 * function is atomic with respect to other list locked calls 1459 * 1460 * You must know what list the SKB is on. 1461 */ 1462 void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) 1463 { 1464 unsigned long flags; 1465 1466 spin_lock_irqsave(&list->lock, flags); 1467 __skb_unlink(skb, list); 1468 spin_unlock_irqrestore(&list->lock, flags); 1469 } 1470 1471 /** 1472 * skb_append - append a buffer 1473 * @old: buffer to insert after 1474 * @newsk: buffer to insert 1475 * @list: list to use 1476 * 1477 * Place a packet after a given packet in a list. The list locks are taken 1478 * and this function is atomic with respect to other list locked calls. 1479 * A buffer cannot be placed on two lists at the same time. 1480 */ 1481 void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) 1482 { 1483 unsigned long flags; 1484 1485 spin_lock_irqsave(&list->lock, flags); 1486 __skb_append(old, newsk, list); 1487 spin_unlock_irqrestore(&list->lock, flags); 1488 } 1489 1490 1491 /** 1492 * skb_insert - insert a buffer 1493 * @old: buffer to insert before 1494 * @newsk: buffer to insert 1495 * @list: list to use 1496 * 1497 * Place a packet before a given packet in a list. The list locks are 1498 * taken and this function is atomic with respect to other list locked 1499 * calls. 1500 * 1501 * A buffer cannot be placed on two lists at the same time. 1502 */ 1503 void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) 1504 { 1505 unsigned long flags; 1506 1507 spin_lock_irqsave(&list->lock, flags); 1508 __skb_insert(newsk, old->prev, old, list); 1509 spin_unlock_irqrestore(&list->lock, flags); 1510 } 1511 1512 #if 0 1513 /* 1514 * Tune the memory allocator for a new MTU size. 1515 */ 1516 void skb_add_mtu(int mtu) 1517 { 1518 /* Must match allocation in alloc_skb */ 1519 mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info); 1520 1521 kmem_add_cache_size(mtu); 1522 } 1523 #endif 1524 1525 static inline void skb_split_inside_header(struct sk_buff *skb, 1526 struct sk_buff* skb1, 1527 const u32 len, const int pos) 1528 { 1529 int i; 1530 1531 memcpy(skb_put(skb1, pos - len), skb->data + len, pos - len); 1532 1533 /* And move data appendix as is. */ 1534 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 1535 skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i]; 1536 1537 skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags; 1538 skb_shinfo(skb)->nr_frags = 0; 1539 skb1->data_len = skb->data_len; 1540 skb1->len += skb1->data_len; 1541 skb->data_len = 0; 1542 skb->len = len; 1543 skb->tail = skb->data + len; 1544 } 1545 1546 static inline void skb_split_no_header(struct sk_buff *skb, 1547 struct sk_buff* skb1, 1548 const u32 len, int pos) 1549 { 1550 int i, k = 0; 1551 const int nfrags = skb_shinfo(skb)->nr_frags; 1552 1553 skb_shinfo(skb)->nr_frags = 0; 1554 skb1->len = skb1->data_len = skb->len - len; 1555 skb->len = len; 1556 skb->data_len = len - pos; 1557 1558 for (i = 0; i < nfrags; i++) { 1559 int size = skb_shinfo(skb)->frags[i].size; 1560 1561 if (pos + size > len) { 1562 skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i]; 1563 1564 if (pos < len) { 1565 /* Split frag. 1566 * We have two variants in this case: 1567 * 1. Move all the frag to the second 1568 * part, if it is possible. F.e. 1569 * this approach is mandatory for TUX, 1570 * where splitting is expensive. 1571 * 2. Split is accurately. We make this. 1572 */ 1573 get_page(skb_shinfo(skb)->frags[i].page); 1574 skb_shinfo(skb1)->frags[0].page_offset += len - pos; 1575 skb_shinfo(skb1)->frags[0].size -= len - pos; 1576 skb_shinfo(skb)->frags[i].size = len - pos; 1577 skb_shinfo(skb)->nr_frags++; 1578 } 1579 k++; 1580 } else 1581 skb_shinfo(skb)->nr_frags++; 1582 pos += size; 1583 } 1584 skb_shinfo(skb1)->nr_frags = k; 1585 } 1586 1587 /** 1588 * skb_split - Split fragmented skb to two parts at length len. 1589 * @skb: the buffer to split 1590 * @skb1: the buffer to receive the second part 1591 * @len: new length for skb 1592 */ 1593 void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) 1594 { 1595 int pos = skb_headlen(skb); 1596 1597 if (len < pos) /* Split line is inside header. */ 1598 skb_split_inside_header(skb, skb1, len, pos); 1599 else /* Second chunk has no header, nothing to copy. */ 1600 skb_split_no_header(skb, skb1, len, pos); 1601 } 1602 1603 /** 1604 * skb_prepare_seq_read - Prepare a sequential read of skb data 1605 * @skb: the buffer to read 1606 * @from: lower offset of data to be read 1607 * @to: upper offset of data to be read 1608 * @st: state variable 1609 * 1610 * Initializes the specified state variable. Must be called before 1611 * invoking skb_seq_read() for the first time. 1612 */ 1613 void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, 1614 unsigned int to, struct skb_seq_state *st) 1615 { 1616 st->lower_offset = from; 1617 st->upper_offset = to; 1618 st->root_skb = st->cur_skb = skb; 1619 st->frag_idx = st->stepped_offset = 0; 1620 st->frag_data = NULL; 1621 } 1622 1623 /** 1624 * skb_seq_read - Sequentially read skb data 1625 * @consumed: number of bytes consumed by the caller so far 1626 * @data: destination pointer for data to be returned 1627 * @st: state variable 1628 * 1629 * Reads a block of skb data at &consumed relative to the 1630 * lower offset specified to skb_prepare_seq_read(). Assigns 1631 * the head of the data block to &data and returns the length 1632 * of the block or 0 if the end of the skb data or the upper 1633 * offset has been reached. 1634 * 1635 * The caller is not required to consume all of the data 1636 * returned, i.e. &consumed is typically set to the number 1637 * of bytes already consumed and the next call to 1638 * skb_seq_read() will return the remaining part of the block. 1639 * 1640 * Note: The size of each block of data returned can be arbitary, 1641 * this limitation is the cost for zerocopy seqeuental 1642 * reads of potentially non linear data. 1643 * 1644 * Note: Fragment lists within fragments are not implemented 1645 * at the moment, state->root_skb could be replaced with 1646 * a stack for this purpose. 1647 */ 1648 unsigned int skb_seq_read(unsigned int consumed, const u8 **data, 1649 struct skb_seq_state *st) 1650 { 1651 unsigned int block_limit, abs_offset = consumed + st->lower_offset; 1652 skb_frag_t *frag; 1653 1654 if (unlikely(abs_offset >= st->upper_offset)) 1655 return 0; 1656 1657 next_skb: 1658 block_limit = skb_headlen(st->cur_skb); 1659 1660 if (abs_offset < block_limit) { 1661 *data = st->cur_skb->data + abs_offset; 1662 return block_limit - abs_offset; 1663 } 1664 1665 if (st->frag_idx == 0 && !st->frag_data) 1666 st->stepped_offset += skb_headlen(st->cur_skb); 1667 1668 while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) { 1669 frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx]; 1670 block_limit = frag->size + st->stepped_offset; 1671 1672 if (abs_offset < block_limit) { 1673 if (!st->frag_data) 1674 st->frag_data = kmap_skb_frag(frag); 1675 1676 *data = (u8 *) st->frag_data + frag->page_offset + 1677 (abs_offset - st->stepped_offset); 1678 1679 return block_limit - abs_offset; 1680 } 1681 1682 if (st->frag_data) { 1683 kunmap_skb_frag(st->frag_data); 1684 st->frag_data = NULL; 1685 } 1686 1687 st->frag_idx++; 1688 st->stepped_offset += frag->size; 1689 } 1690 1691 if (st->cur_skb->next) { 1692 st->cur_skb = st->cur_skb->next; 1693 st->frag_idx = 0; 1694 goto next_skb; 1695 } else if (st->root_skb == st->cur_skb && 1696 skb_shinfo(st->root_skb)->frag_list) { 1697 st->cur_skb = skb_shinfo(st->root_skb)->frag_list; 1698 goto next_skb; 1699 } 1700 1701 return 0; 1702 } 1703 1704 /** 1705 * skb_abort_seq_read - Abort a sequential read of skb data 1706 * @st: state variable 1707 * 1708 * Must be called if skb_seq_read() was not called until it 1709 * returned 0. 1710 */ 1711 void skb_abort_seq_read(struct skb_seq_state *st) 1712 { 1713 if (st->frag_data) 1714 kunmap_skb_frag(st->frag_data); 1715 } 1716 1717 #define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb)) 1718 1719 static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text, 1720 struct ts_config *conf, 1721 struct ts_state *state) 1722 { 1723 return skb_seq_read(offset, text, TS_SKB_CB(state)); 1724 } 1725 1726 static void skb_ts_finish(struct ts_config *conf, struct ts_state *state) 1727 { 1728 skb_abort_seq_read(TS_SKB_CB(state)); 1729 } 1730 1731 /** 1732 * skb_find_text - Find a text pattern in skb data 1733 * @skb: the buffer to look in 1734 * @from: search offset 1735 * @to: search limit 1736 * @config: textsearch configuration 1737 * @state: uninitialized textsearch state variable 1738 * 1739 * Finds a pattern in the skb data according to the specified 1740 * textsearch configuration. Use textsearch_next() to retrieve 1741 * subsequent occurrences of the pattern. Returns the offset 1742 * to the first occurrence or UINT_MAX if no match was found. 1743 */ 1744 unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, 1745 unsigned int to, struct ts_config *config, 1746 struct ts_state *state) 1747 { 1748 unsigned int ret; 1749 1750 config->get_next_block = skb_ts_get_next_block; 1751 config->finish = skb_ts_finish; 1752 1753 skb_prepare_seq_read(skb, from, to, TS_SKB_CB(state)); 1754 1755 ret = textsearch_find(config, state); 1756 return (ret <= to - from ? ret : UINT_MAX); 1757 } 1758 1759 /** 1760 * skb_append_datato_frags: - append the user data to a skb 1761 * @sk: sock structure 1762 * @skb: skb structure to be appened with user data. 1763 * @getfrag: call back function to be used for getting the user data 1764 * @from: pointer to user message iov 1765 * @length: length of the iov message 1766 * 1767 * Description: This procedure append the user data in the fragment part 1768 * of the skb if any page alloc fails user this procedure returns -ENOMEM 1769 */ 1770 int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, 1771 int (*getfrag)(void *from, char *to, int offset, 1772 int len, int odd, struct sk_buff *skb), 1773 void *from, int length) 1774 { 1775 int frg_cnt = 0; 1776 skb_frag_t *frag = NULL; 1777 struct page *page = NULL; 1778 int copy, left; 1779 int offset = 0; 1780 int ret; 1781 1782 do { 1783 /* Return error if we don't have space for new frag */ 1784 frg_cnt = skb_shinfo(skb)->nr_frags; 1785 if (frg_cnt >= MAX_SKB_FRAGS) 1786 return -EFAULT; 1787 1788 /* allocate a new page for next frag */ 1789 page = alloc_pages(sk->sk_allocation, 0); 1790 1791 /* If alloc_page fails just return failure and caller will 1792 * free previous allocated pages by doing kfree_skb() 1793 */ 1794 if (page == NULL) 1795 return -ENOMEM; 1796 1797 /* initialize the next frag */ 1798 sk->sk_sndmsg_page = page; 1799 sk->sk_sndmsg_off = 0; 1800 skb_fill_page_desc(skb, frg_cnt, page, 0, 0); 1801 skb->truesize += PAGE_SIZE; 1802 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc); 1803 1804 /* get the new initialized frag */ 1805 frg_cnt = skb_shinfo(skb)->nr_frags; 1806 frag = &skb_shinfo(skb)->frags[frg_cnt - 1]; 1807 1808 /* copy the user data to page */ 1809 left = PAGE_SIZE - frag->page_offset; 1810 copy = (length > left)? left : length; 1811 1812 ret = getfrag(from, (page_address(frag->page) + 1813 frag->page_offset + frag->size), 1814 offset, copy, 0, skb); 1815 if (ret < 0) 1816 return -EFAULT; 1817 1818 /* copy was successful so update the size parameters */ 1819 sk->sk_sndmsg_off += copy; 1820 frag->size += copy; 1821 skb->len += copy; 1822 skb->data_len += copy; 1823 offset += copy; 1824 length -= copy; 1825 1826 } while (length > 0); 1827 1828 return 0; 1829 } 1830 1831 /** 1832 * skb_pull_rcsum - pull skb and update receive checksum 1833 * @skb: buffer to update 1834 * @start: start of data before pull 1835 * @len: length of data pulled 1836 * 1837 * This function performs an skb_pull on the packet and updates 1838 * update the CHECKSUM_HW checksum. It should be used on receive 1839 * path processing instead of skb_pull unless you know that the 1840 * checksum difference is zero (e.g., a valid IP header) or you 1841 * are setting ip_summed to CHECKSUM_NONE. 1842 */ 1843 unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len) 1844 { 1845 BUG_ON(len > skb->len); 1846 skb->len -= len; 1847 BUG_ON(skb->len < skb->data_len); 1848 skb_postpull_rcsum(skb, skb->data, len); 1849 return skb->data += len; 1850 } 1851 1852 EXPORT_SYMBOL_GPL(skb_pull_rcsum); 1853 1854 /** 1855 * skb_segment - Perform protocol segmentation on skb. 1856 * @skb: buffer to segment 1857 * @features: features for the output path (see dev->features) 1858 * 1859 * This function performs segmentation on the given skb. It returns 1860 * the segment at the given position. It returns NULL if there are 1861 * no more segments to generate, or when an error is encountered. 1862 */ 1863 struct sk_buff *skb_segment(struct sk_buff *skb, int features) 1864 { 1865 struct sk_buff *segs = NULL; 1866 struct sk_buff *tail = NULL; 1867 unsigned int mss = skb_shinfo(skb)->gso_size; 1868 unsigned int doffset = skb->data - skb->mac.raw; 1869 unsigned int offset = doffset; 1870 unsigned int headroom; 1871 unsigned int len; 1872 int sg = features & NETIF_F_SG; 1873 int nfrags = skb_shinfo(skb)->nr_frags; 1874 int err = -ENOMEM; 1875 int i = 0; 1876 int pos; 1877 1878 __skb_push(skb, doffset); 1879 headroom = skb_headroom(skb); 1880 pos = skb_headlen(skb); 1881 1882 do { 1883 struct sk_buff *nskb; 1884 skb_frag_t *frag; 1885 int hsize, nsize; 1886 int k; 1887 int size; 1888 1889 len = skb->len - offset; 1890 if (len > mss) 1891 len = mss; 1892 1893 hsize = skb_headlen(skb) - offset; 1894 if (hsize < 0) 1895 hsize = 0; 1896 nsize = hsize + doffset; 1897 if (nsize > len + doffset || !sg) 1898 nsize = len + doffset; 1899 1900 nskb = alloc_skb(nsize + headroom, GFP_ATOMIC); 1901 if (unlikely(!nskb)) 1902 goto err; 1903 1904 if (segs) 1905 tail->next = nskb; 1906 else 1907 segs = nskb; 1908 tail = nskb; 1909 1910 nskb->dev = skb->dev; 1911 nskb->priority = skb->priority; 1912 nskb->protocol = skb->protocol; 1913 nskb->dst = dst_clone(skb->dst); 1914 memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); 1915 nskb->pkt_type = skb->pkt_type; 1916 nskb->mac_len = skb->mac_len; 1917 1918 skb_reserve(nskb, headroom); 1919 nskb->mac.raw = nskb->data; 1920 nskb->nh.raw = nskb->data + skb->mac_len; 1921 nskb->h.raw = nskb->nh.raw + (skb->h.raw - skb->nh.raw); 1922 memcpy(skb_put(nskb, doffset), skb->data, doffset); 1923 1924 if (!sg) { 1925 nskb->csum = skb_copy_and_csum_bits(skb, offset, 1926 skb_put(nskb, len), 1927 len, 0); 1928 continue; 1929 } 1930 1931 frag = skb_shinfo(nskb)->frags; 1932 k = 0; 1933 1934 nskb->ip_summed = CHECKSUM_HW; 1935 nskb->csum = skb->csum; 1936 memcpy(skb_put(nskb, hsize), skb->data + offset, hsize); 1937 1938 while (pos < offset + len) { 1939 BUG_ON(i >= nfrags); 1940 1941 *frag = skb_shinfo(skb)->frags[i]; 1942 get_page(frag->page); 1943 size = frag->size; 1944 1945 if (pos < offset) { 1946 frag->page_offset += offset - pos; 1947 frag->size -= offset - pos; 1948 } 1949 1950 k++; 1951 1952 if (pos + size <= offset + len) { 1953 i++; 1954 pos += size; 1955 } else { 1956 frag->size -= pos + size - (offset + len); 1957 break; 1958 } 1959 1960 frag++; 1961 } 1962 1963 skb_shinfo(nskb)->nr_frags = k; 1964 nskb->data_len = len - hsize; 1965 nskb->len += nskb->data_len; 1966 nskb->truesize += nskb->data_len; 1967 } while ((offset += len) < skb->len); 1968 1969 return segs; 1970 1971 err: 1972 while ((skb = segs)) { 1973 segs = skb->next; 1974 kfree(skb); 1975 } 1976 return ERR_PTR(err); 1977 } 1978 1979 EXPORT_SYMBOL_GPL(skb_segment); 1980 1981 void __init skb_init(void) 1982 { 1983 skbuff_head_cache = kmem_cache_create("skbuff_head_cache", 1984 sizeof(struct sk_buff), 1985 0, 1986 SLAB_HWCACHE_ALIGN, 1987 NULL, NULL); 1988 if (!skbuff_head_cache) 1989 panic("cannot create skbuff cache"); 1990 1991 skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", 1992 (2*sizeof(struct sk_buff)) + 1993 sizeof(atomic_t), 1994 0, 1995 SLAB_HWCACHE_ALIGN, 1996 NULL, NULL); 1997 if (!skbuff_fclone_cache) 1998 panic("cannot create skbuff cache"); 1999 } 2000 2001 EXPORT_SYMBOL(___pskb_trim); 2002 EXPORT_SYMBOL(__kfree_skb); 2003 EXPORT_SYMBOL(kfree_skb); 2004 EXPORT_SYMBOL(__pskb_pull_tail); 2005 EXPORT_SYMBOL(__alloc_skb); 2006 EXPORT_SYMBOL(pskb_copy); 2007 EXPORT_SYMBOL(pskb_expand_head); 2008 EXPORT_SYMBOL(skb_checksum); 2009 EXPORT_SYMBOL(skb_clone); 2010 EXPORT_SYMBOL(skb_clone_fraglist); 2011 EXPORT_SYMBOL(skb_copy); 2012 EXPORT_SYMBOL(skb_copy_and_csum_bits); 2013 EXPORT_SYMBOL(skb_copy_and_csum_dev); 2014 EXPORT_SYMBOL(skb_copy_bits); 2015 EXPORT_SYMBOL(skb_copy_expand); 2016 EXPORT_SYMBOL(skb_over_panic); 2017 EXPORT_SYMBOL(skb_pad); 2018 EXPORT_SYMBOL(skb_realloc_headroom); 2019 EXPORT_SYMBOL(skb_under_panic); 2020 EXPORT_SYMBOL(skb_dequeue); 2021 EXPORT_SYMBOL(skb_dequeue_tail); 2022 EXPORT_SYMBOL(skb_insert); 2023 EXPORT_SYMBOL(skb_queue_purge); 2024 EXPORT_SYMBOL(skb_queue_head); 2025 EXPORT_SYMBOL(skb_queue_tail); 2026 EXPORT_SYMBOL(skb_unlink); 2027 EXPORT_SYMBOL(skb_append); 2028 EXPORT_SYMBOL(skb_split); 2029 EXPORT_SYMBOL(skb_prepare_seq_read); 2030 EXPORT_SYMBOL(skb_seq_read); 2031 EXPORT_SYMBOL(skb_abort_seq_read); 2032 EXPORT_SYMBOL(skb_find_text); 2033 EXPORT_SYMBOL(skb_append_datato_frags); 2034