1.\" Copyright (c) 2000 FreeBSD Inc. 2.\" All rights reserved. 3.\" 4.\" Redistribution and use in source and binary forms, with or without 5.\" modification, are permitted provided that the following conditions 6.\" are met: 7.\" 1. Redistributions of source code must retain the above copyright 8.\" notice, this list of conditions and the following disclaimer. 9.\" 2. Redistributions in binary form must reproduce the above copyright 10.\" notice, this list of conditions and the following disclaimer in the 11.\" documentation and/or other materials provided with the distribution. 12.\" 13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16.\" ARE DISCLAIMED. IN NO EVENT SHALL [your name] OR CONTRIBUTORS BE LIABLE 17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23.\" SUCH DAMAGE. 24.\" 25.\" $FreeBSD$ 26.\" 27.Dd November 18, 2005 28.Dt MBUF 9 29.Os 30.\" 31.Sh NAME 32.Nm mbuf 33.Nd "memory management in the kernel IPC subsystem" 34.\" 35.Sh SYNOPSIS 36.In sys/param.h 37.In sys/systm.h 38.In sys/mbuf.h 39.\" 40.Ss Mbuf allocation macros 41.Fn MGET "struct mbuf *mbuf" "int how" "short type" 42.Fn MGETHDR "struct mbuf *mbuf" "int how" "short type" 43.Fn MCLGET "struct mbuf *mbuf" "int how" 44.Fo MEXTADD 45.Fa "struct mbuf *mbuf" 46.Fa "caddr_t buf" 47.Fa "u_int size" 48.Fa "void (*free)(void *opt_args)" 49.Fa "void *opt_args" 50.Fa "short flags" 51.Fa "int type" 52.Fc 53.Fn MEXTFREE "struct mbuf *mbuf" 54.Fn MFREE "struct mbuf *mbuf" "struct mbuf *successor" 55.\" 56.Ss Mbuf utility macros 57.Fn mtod "struct mbuf *mbuf" "type" 58.Fn M_ALIGN "struct mbuf *mbuf" "u_int len" 59.Fn MH_ALIGN "struct mbuf *mbuf" "u_int len" 60.Ft int 61.Fn M_LEADINGSPACE "struct mbuf *mbuf" 62.Ft int 63.Fn M_TRAILINGSPACE "struct mbuf *mbuf" 64.Fn M_MOVE_PKTHDR "struct mbuf *to" "struct mbuf *from" 65.Fn M_PREPEND "struct mbuf *mbuf" "int len" "int how" 66.Fn MCHTYPE "struct mbuf *mbuf" "u_int type" 67.Ft int 68.Fn M_WRITABLE "struct mbuf *mbuf" 69.\" 70.Ss Mbuf allocation functions 71.Ft struct mbuf * 72.Fn m_get "int how" "int type" 73.Ft struct mbuf * 74.Fn m_getm "struct mbuf *orig" "int len" "int how" "int type" 75.Ft struct mbuf * 76.Fn m_getcl "int how" "short type" "int flags" 77.Ft struct mbuf * 78.Fn m_getclr "int how" "int type" 79.Ft struct mbuf * 80.Fn m_gethdr "int how" "int type" 81.Ft struct mbuf * 82.Fn m_free "struct mbuf *mbuf" 83.Ft void 84.Fn m_freem "struct mbuf *mbuf" 85.\" 86.Ss Mbuf utility functions 87.Ft void 88.Fn m_adj "struct mbuf *mbuf" "int len" 89.Ft void 90.Fn m_align "struct mbuf *mbuf" "int len" 91.Ft int 92.Fn m_append "struct mbuf *mbuf" "int len" "c_caddr_t cp" 93.Ft struct mbuf * 94.Fn m_prepend "struct mbuf *mbuf" "int len" "int how" 95.Ft struct mbuf * 96.Fn m_copyup "struct mbuf *mbuf" "int len" "int dstoff" 97.Ft struct mbuf * 98.Fn m_pullup "struct mbuf *mbuf" "int len" 99.Ft struct mbuf * 100.Fn m_copym "struct mbuf *mbuf" "int offset" "int len" "int how" 101.Ft struct mbuf * 102.Fn m_copypacket "struct mbuf *mbuf" "int how" 103.Ft struct mbuf * 104.Fn m_dup "struct mbuf *mbuf" "int how" 105.Ft void 106.Fn m_copydata "const struct mbuf *mbuf" "int offset" "int len" "caddr_t buf" 107.Ft void 108.Fn m_copyback "struct mbuf *mbuf" "int offset" "int len" "caddr_t buf" 109.Ft struct mbuf * 110.Fo m_devget 111.Fa "char *buf" 112.Fa "int len" 113.Fa "int offset" 114.Fa "struct ifnet *ifp" 115.Fa "void (*copy)(char *from, caddr_t to, u_int len)" 116.Fc 117.Ft void 118.Fn m_cat "struct mbuf *m" "struct mbuf *n" 119.Ft u_int 120.Fn m_fixhdr "struct mbuf *mbuf" 121.Ft void 122.Fn m_dup_pkthdr "struct mbuf *to" "struct mbuf *from" 123.Ft void 124.Fn m_move_pkthdr "struct mbuf *to" "struct mbuf *from" 125.Ft u_int 126.Fn m_length "struct mbuf *mbuf" "struct mbuf **last" 127.Ft struct mbuf * 128.Fn m_split "struct mbuf *mbuf" "int len" "int how" 129.Ft int 130.Fn m_apply "struct mbuf *mbuf" "int off" "int len" "int (*f)(void *arg, void *data, u_int len)" "void *arg" 131.Ft struct mbuf * 132.Fn m_getptr "struct mbuf *mbuf" "int loc" "int *off" 133.Ft struct mbuf * 134.Fn m_defrag "struct mbuf *m0" "int how" 135.\" 136.Sh DESCRIPTION 137An 138.Vt mbuf 139is a basic unit of memory management in the kernel IPC subsystem. 140Network packets and socket buffers are stored in 141.Vt mbufs . 142A network packet may span multiple 143.Vt mbufs 144arranged into a 145.Vt mbuf chain 146(linked list), 147which allows adding or trimming 148network headers with little overhead. 149.Pp 150While a developer should not bother with 151.Vt mbuf 152internals without serious 153reason in order to avoid incompatibilities with future changes, it 154is useful to understand the general structure of an 155.Vt mbuf . 156.Pp 157An 158.Vt mbuf 159consists of a variable-sized header and a small internal 160buffer for data. 161The total size of an 162.Vt mbuf , 163.Dv MSIZE , 164is a constant defined in 165.In sys/param.h . 166The 167.Vt mbuf 168header includes: 169.Pp 170.Bl -tag -width "m_nextpkt" -offset indent 171.It Va m_next 172.Pq Vt struct mbuf * 173A pointer to the next 174.Vt mbuf 175in the 176.Vt mbuf chain . 177.It Va m_nextpkt 178.Pq Vt struct mbuf * 179A pointer to the next 180.Vt mbuf chain 181in the queue. 182.It Va m_data 183.Pq Vt caddr_t 184A pointer to data attached to this 185.Vt mbuf . 186.It Va m_len 187.Pq Vt int 188The length of the data. 189.It Va m_type 190.Pq Vt short 191The type of the data. 192.It Va m_flags 193.Pq Vt int 194The 195.Vt mbuf 196flags. 197.El 198.Pp 199The 200.Vt mbuf 201flag bits are defined as follows: 202.Bd -literal 203/* mbuf flags */ 204#define M_EXT 0x0001 /* has associated external storage */ 205#define M_PKTHDR 0x0002 /* start of record */ 206#define M_EOR 0x0004 /* end of record */ 207#define M_RDONLY 0x0008 /* associated data marked read-only */ 208#define M_PROTO1 0x0010 /* protocol-specific */ 209#define M_PROTO2 0x0020 /* protocol-specific */ 210#define M_PROTO3 0x0040 /* protocol-specific */ 211#define M_PROTO4 0x0080 /* protocol-specific */ 212#define M_PROTO5 0x0100 /* protocol-specific */ 213#define M_PROTO6 0x4000 /* protocol-specific (avoid M_BCAST conflict) */ 214#define M_FREELIST 0x8000 /* mbuf is on the free list */ 215 216/* mbuf pkthdr flags (also stored in m_flags) */ 217#define M_BCAST 0x0200 /* send/received as link-level broadcast */ 218#define M_MCAST 0x0400 /* send/received as link-level multicast */ 219#define M_FRAG 0x0800 /* packet is fragment of larger packet */ 220#define M_FIRSTFRAG 0x1000 /* packet is first fragment */ 221#define M_LASTFRAG 0x2000 /* packet is last fragment */ 222.Ed 223.Pp 224The available 225.Vt mbuf 226types are defined as follows: 227.Bd -literal 228/* mbuf types */ 229#define MT_DATA 1 /* dynamic (data) allocation */ 230#define MT_HEADER 2 /* packet header */ 231#define MT_SONAME 8 /* socket name */ 232#define MT_FTABLE 11 /* fragment reassembly header */ 233#define MT_CONTROL 14 /* extra-data protocol message */ 234#define MT_OOBDATA 15 /* expedited data */ 235.Ed 236.Pp 237If the 238.Dv M_PKTHDR 239flag is set, a 240.Vt struct pkthdr Va m_pkthdr 241is added to the 242.Vt mbuf 243header. 244It contains a pointer to the interface 245the packet has been received from 246.Pq Vt struct ifnet Va *rcvif , 247and the total packet length 248.Pq Vt int Va len . 249Optionally, it may also contain an attached list of packet tags 250.Pq Vt "struct m_tag" . 251See 252.Xr mbuf_tags 9 253for details. 254Fields used in offloading checksum calculation to the hardware are kept in 255.Va m_pkthdr 256as well. 257See 258.Sx HARDWARE-ASSISTED CHECKSUM CALCULATION 259for details. 260.Pp 261If small enough, data is stored in the internal data buffer of an 262.Vt mbuf . 263If the data is sufficiently large, another 264.Vt mbuf 265may be added to the 266.Vt mbuf chain , 267or external storage may be associated with the 268.Vt mbuf . 269.Dv MHLEN 270bytes of data can fit into an 271.Vt mbuf 272with the 273.Dv M_PKTHDR 274flag set, 275.Dv MLEN 276bytes can otherwise. 277.Pp 278If external storage is being associated with an 279.Vt mbuf , 280the 281.Va m_ext 282header is added at the cost of losing the internal data buffer. 283It includes a pointer to external storage, the size of the storage, 284a pointer to a function used for freeing the storage, 285a pointer to an optional argument that can be passed to the function, 286and a pointer to a reference counter. 287An 288.Vt mbuf 289using external storage has the 290.Dv M_EXT 291flag set. 292.Pp 293The system supplies a macro for allocating the desired external storage 294buffer, 295.Dv MEXTADD . 296.Pp 297The allocation and management of the reference counter is handled by the 298subsystem. 299.Pp 300The system also supplies a default type of external storage buffer called an 301.Vt mbuf cluster . 302.Vt Mbuf clusters 303can be allocated and configured with the use of the 304.Dv MCLGET 305macro. 306Each 307.Vt mbuf cluster 308is 309.Dv MCLBYTES 310in size, where MCLBYTES is a machine-dependent constant. 311The system defines an advisory macro 312.Dv MINCLSIZE , 313which is the smallest amount of data to put into an 314.Vt mbuf cluster . 315It is equal to the sum of 316.Dv MLEN 317and 318.Dv MHLEN . 319It is typically preferable to store data into the data region of an 320.Vt mbuf , 321if size permits, as opposed to allocating a separate 322.Vt mbuf cluster 323to hold the same data. 324.\" 325.Ss Macros and Functions 326There are numerous predefined macros and functions that provide the 327developer with common utilities. 328.\" 329.Bl -ohang -offset indent 330.It Fn mtod mbuf type 331Convert an 332.Fa mbuf 333pointer to a data pointer. 334The macro expands to the data pointer cast to the pointer of the specified 335.Fa type . 336.Sy Note : 337It is advisable to ensure that there is enough contiguous data in 338.Fa mbuf . 339See 340.Fn m_pullup 341for details. 342.It Fn MGET mbuf how type 343Allocate an 344.Vt mbuf 345and initialize it to contain internal data. 346.Fa mbuf 347will point to the allocated 348.Vt mbuf 349on success, or be set to 350.Dv NULL 351on failure. 352The 353.Fa how 354argument is to be set to 355.Dv M_TRYWAIT 356or 357.Dv M_DONTWAIT . 358It specifies whether the caller is willing to block if necessary. 359If 360.Fa how 361is set to 362.Dv M_TRYWAIT , 363a failed allocation will result in the caller being put 364to sleep for a designated 365kern.ipc.mbuf_wait 366.Xr ( sysctl 8 367tunable) 368number of ticks. 369A number of other functions and macros related to 370.Vt mbufs 371have the same argument because they may 372at some point need to allocate new 373.Vt mbufs . 374.Pp 375Programmers should be careful not to confuse the 376.Vt mbuf 377allocation flag 378.Dv M_DONTWAIT 379with the 380.Xr malloc 9 381allocation flag, 382.Dv M_NOWAIT . 383They are not the same. 384.It Fn MGETHDR mbuf how type 385Allocate an 386.Vt mbuf 387and initialize it to contain a packet header 388and internal data. 389See 390.Fn MGET 391for details. 392.It Fn MCLGET mbuf how 393Allocate and attach an 394.Vt mbuf cluster 395to 396.Fa mbuf . 397If the macro fails, the 398.Dv M_EXT 399flag will not be set in 400.Fa mbuf . 401.It Fn M_ALIGN mbuf len 402Set the pointer 403.Fa mbuf->m_data 404to place an object of the size 405.Fa len 406at the end of the internal data area of 407.Fa mbuf , 408long word aligned. 409Applicable only if 410.Fa mbuf 411is newly allocated with 412.Fn MGET 413or 414.Fn m_get . 415.It Fn MH_ALIGN mbuf len 416Serves the same purpose as 417.Fn M_ALIGN 418does, but only for 419.Fa mbuf 420newly allocated with 421.Fn MGETHDR 422or 423.Fn m_gethdr , 424or initialized by 425.Fn m_dup_pkthdr 426or 427.Fn m_move_pkthdr . 428.It Fn m_align mbuf len 429Services the same purpose as 430.Fn M_ALIGN 431but handles any type of mbuf. 432.It Fn M_LEADINGSPACE mbuf 433Returns the number of bytes available before the beginning 434of data in 435.Fa mbuf . 436.It Fn M_TRAILINGSPACE mbuf 437Returns the number of bytes available after the end of data in 438.Fa mbuf . 439.It Fn M_PREPEND mbuf len how 440This macro operates on an 441.Vt mbuf chain . 442It is an optimized wrapper for 443.Fn m_prepend 444that can make use of possible empty space before data 445(e.g.\& left after trimming of a link-layer header). 446The new 447.Vt mbuf chain 448pointer or 449.Dv NULL 450is in 451.Fa mbuf 452after the call. 453.It Fn M_MOVE_PKTHDR to from 454Using this macro is equivalent to calling 455.Fn m_move_pkthdr to from . 456.It Fn M_WRITABLE mbuf 457This macro will evaluate true if 458.Fa mbuf 459is not marked 460.Dv M_RDONLY 461and if either 462.Fa mbuf 463does not contain external storage or, 464if it does, 465then if the reference count of the storage is not greater than 1. 466The 467.Dv M_RDONLY 468flag can be set in 469.Fa mbuf->m_flags . 470This can be achieved during setup of the external storage, 471by passing the 472.Dv M_RDONLY 473bit as a 474.Fa flags 475argument to the 476.Fn MEXTADD 477macro, or can be directly set in individual 478.Vt mbufs . 479.It Fn MCHTYPE mbuf type 480Change the type of 481.Fa mbuf 482to 483.Fa type . 484This is a relatively expensive operation and should be avoided. 485.El 486.Pp 487The functions are: 488.Bl -ohang -offset indent 489.It Fn m_get how type 490A function version of 491.Fn MGET 492for non-critical paths. 493.It Fn m_getm orig len how type 494Allocate 495.Fa len 496bytes worth of 497.Vt mbufs 498and 499.Vt mbuf clusters 500if necessary and append the resulting allocated 501.Vt mbuf chain 502to the 503.Vt mbuf chain 504.Fa orig , 505if it is 506.No non- Ns Dv NULL . 507If the allocation fails at any point, 508free whatever was allocated and return 509.Dv NULL . 510If 511.Fa orig 512is 513.No non- Ns Dv NULL , 514it will not be freed. 515It is possible to use 516.Fn m_getm 517to either append 518.Fa len 519bytes to an existing 520.Vt mbuf 521or 522.Vt mbuf chain 523(for example, one which may be sitting in a pre-allocated ring) 524or to simply perform an all-or-nothing 525.Vt mbuf 526and 527.Vt mbuf cluster 528allocation. 529.It Fn m_gethdr how type 530A function version of 531.Fn MGETHDR 532for non-critical paths. 533.It Fn m_getcl how type flags 534Fetch an 535.Vt mbuf 536with a 537.Vt mbuf cluster 538attached to it. 539If one of the allocations fails, the entire allocation fails. 540This routine is the preferred way of fetching both the 541.Vt mbuf 542and 543.Vt mbuf cluster 544together, as it avoids having to unlock/relock between allocations. 545Returns 546.Dv NULL 547on failure. 548.It Fn m_getclr how type 549Allocate an 550.Vt mbuf 551and zero out the data region. 552.It Fn m_free mbuf 553Frees 554.Vt mbuf . 555Returns 556.Va m_next 557of the freed 558.Vt mbuf . 559.El 560.Pp 561The functions below operate on 562.Vt mbuf chains . 563.Bl -ohang -offset indent 564.It Fn m_freem mbuf 565Free an entire 566.Vt mbuf chain , 567including any external storage. 568.\" 569.It Fn m_adj mbuf len 570Trim 571.Fa len 572bytes from the head of an 573.Vt mbuf chain 574if 575.Fa len 576is positive, from the tail otherwise. 577.\" 578.It Fn m_append mbuf len cp 579Append 580.Vt len 581bytes of data 582.Vt cp 583to the 584.Vt mbuf chain . 585Extend the mbuf chain if the new data does not fit in 586existing space. 587.\" 588.It Fn m_prepend mbuf len how 589Allocate a new 590.Vt mbuf 591and prepend it to the 592.Vt mbuf chain , 593handle 594.Dv M_PKTHDR 595properly. 596.Sy Note : 597It does not allocate any 598.Vt mbuf clusters , 599so 600.Fa len 601must be less than 602.Dv MLEN 603or 604.Dv MHLEN , 605depending on the 606.Dv M_PKTHDR 607flag setting. 608.\" 609.It Fn m_copyup mbuf len dstoff 610Similar to 611.Fn m_pullup 612but copies 613.Fa len 614bytes of data into a new mbuf at 615.Fa dstoff 616bytes into the mbuf. 617The 618.Fa dstoff 619argument aligns the data and leaves room for a link layer header. 620Returns the new 621.Vt mbuf chain 622on success, 623and frees the 624.Vt mbuf chain 625and returns 626.Dv NULL 627on failure. 628.Sy Note : 629The function does not allocate 630.Vt mbuf clusters , 631so 632.Fa len + dstoff 633must be less than 634.Dv MHLEN . 635.\" 636.It Fn m_pullup mbuf len 637Arrange that the first 638.Fa len 639bytes of an 640.Vt mbuf chain 641are contiguous and lay in the data area of 642.Fa mbuf , 643so they are accessible with 644.Fn mtod mbuf type . 645Return the new 646.Vt mbuf chain 647on success, 648.Dv NULL 649on failure 650(the 651.Vt mbuf chain 652is freed in this case). 653.Sy Note : 654It does not allocate any 655.Vt mbuf clusters , 656so 657.Fa len 658must be less than 659.Dv MHLEN . 660.\" 661.It Fn m_copym mbuf offset len how 662Make a copy of an 663.Vt mbuf chain 664starting 665.Fa offset 666bytes from the beginning, continuing for 667.Fa len 668bytes. 669If 670.Fa len 671is 672.Dv M_COPYALL , 673copy to the end of the 674.Vt mbuf chain . 675.Sy Note : 676The copy is read-only, because the 677.Vt mbuf clusters 678are not copied, only their reference counts are incremented. 679.\" 680.It Fn m_copypacket mbuf how 681Copy an entire packet including header, which must be present. 682This is an optimized version of the common case 683.Fn m_copym mbuf 0 M_COPYALL how . 684.Sy Note : 685the copy is read-only, because the 686.Vt mbuf clusters 687are not copied, only their reference counts are incremented. 688.\" 689.It Fn m_dup mbuf how 690Copy a packet header 691.Vt mbuf chain 692into a completely new 693.Vt mbuf chain , 694including copying any 695.Vt mbuf clusters . 696Use this instead of 697.Fn m_copypacket 698when you need a writable copy of an 699.Vt mbuf chain . 700.\" 701.It Fn m_copydata mbuf offset len buf 702Copy data from an 703.Vt mbuf chain 704starting 705.Fa off 706bytes from the beginning, continuing for 707.Fa len 708bytes, into the indicated buffer 709.Fa buf . 710.\" 711.It Fn m_copyback mbuf offset len buf 712Copy 713.Fa len 714bytes from the buffer 715.Fa buf 716back into the indicated 717.Vt mbuf chain , 718starting at 719.Fa offset 720bytes from the beginning of the 721.Vt mbuf chain , 722extending the 723.Vt mbuf chain 724if necessary. 725.Sy Note : 726It does not allocate any 727.Vt mbuf clusters , 728just adds 729.Vt mbufs 730to the 731.Vt mbuf chain . 732It is safe to set 733.Fa offset 734beyond the current 735.Vt mbuf chain 736end: zeroed 737.Vt mbufs 738will be allocated to fill the space. 739.\" 740.It Fn m_length mbuf last 741Return the length of the 742.Vt mbuf chain , 743and optionally a pointer to the last 744.Vt mbuf . 745.\" 746.It Fn m_dup_pkthdr to from how 747Upon the function's completion, the 748.Vt mbuf 749.Fa to 750will contain an identical copy of 751.Fa from->m_pkthdr 752and the per-packet attributes found in the 753.Vt mbuf chain 754.Fa from . 755The 756.Vt mbuf 757.Fa from 758must have the flag 759.Dv M_PKTHDR 760initially set, and 761.Fa to 762must be empty on entry. 763.\" 764.It Fn m_move_pkthdr to from 765Move 766.Va m_pkthdr 767and the per-packet attributes from the 768.Vt mbuf chain 769.Fa from 770to the 771.Vt mbuf 772.Fa to . 773The 774.Vt mbuf 775.Fa from 776must have the flag 777.Dv M_PKTHDR 778initially set, and 779.Fa to 780must be empty on entry. 781Upon the function's completion, 782.Fa from 783will have the flag 784.Dv M_PKTHDR 785and the per-packet attributes cleared. 786.\" 787.It Fn m_fixhdr mbuf 788Set the packet-header length to the length of the 789.Vt mbuf chain . 790.\" 791.It Fn m_devget buf len offset ifp copy 792Copy data from a device local memory pointed to by 793.Fa buf 794to an 795.Vt mbuf chain . 796The copy is done using a specified copy routine 797.Fa copy , 798or 799.Fn bcopy 800if 801.Fa copy 802is 803.Dv NULL . 804.\" 805.It Fn m_cat m n 806Concatenate 807.Fa n 808to 809.Fa m . 810Both 811.Vt mbuf chains 812must be of the same type. 813.Fa N 814is still valid after the function returned. 815.Sy Note : 816It does not handle 817.Dv M_PKTHDR 818and friends. 819.\" 820.It Fn m_split mbuf len how 821Partition an 822.Vt mbuf chain 823in two pieces, returning the tail: 824all but the first 825.Fa len 826bytes. 827In case of failure, it returns 828.Dv NULL 829and attempts to restore the 830.Vt mbuf chain 831to its original state. 832.\" 833.It Fn m_apply mbuf off len f arg 834Apply a function to an 835.Vt mbuf chain , 836at offset 837.Fa off , 838for length 839.Fa len 840bytes. 841Typically used to avoid calls to 842.Fn m_pullup 843which would otherwise be unnecessary or undesirable. 844.Fa arg 845is a convenience argument which is passed to the callback function 846.Fa f . 847.Pp 848Each time 849.Fn f 850is called, it will be passed 851.Fa arg , 852a pointer to the 853.Fa data 854in the current mbuf, and the length 855.Fa len 856of the data in this mbuf to which the function should be applied. 857.Pp 858The function should return zero to indicate success; 859otherwise, if an error is indicated, then 860.Fn m_apply 861will return the error and stop iterating through the 862.Vt mbuf chain . 863.\" 864.It Fn m_getptr mbuf loc off 865Return a pointer to the mbuf containing the data located at 866.Fa loc 867bytes from the beginning of the 868.Vt mbuf chain . 869The corresponding offset into the mbuf will be stored in 870.Fa *off . 871.It Fn m_defrag m0 how 872Defragment an mbuf chain, returning the shortest possible 873chain of mbufs and clusters. 874If allocation fails and this can not be completed, 875.Dv NULL 876will be returned and the original chain will be unchanged. 877Upon success, the original chain will be freed and the new 878chain will be returned. 879.Fa how 880should be either 881.Dv M_TRYWAIT 882or 883.Dv M_DONTWAIT , 884depending on the caller's preference. 885.Pp 886This function is especially useful in network drivers, where 887certain long mbuf chains must be shortened before being added 888to TX descriptor lists. 889.El 890.Sh HARDWARE-ASSISTED CHECKSUM CALCULATION 891This section currently applies to TCP/IP only. 892In order to save the host CPU resources, computing checksums is 893offloaded to the network interface hardware if possible. 894The 895.Va m_pkthdr 896member of the leading 897.Vt mbuf 898of a packet contains two fields used for that purpose, 899.Vt int Va csum_flags 900and 901.Vt int Va csum_data . 902The meaning of those fields depends on the direction a packet flows in, 903and on whether the packet is fragmented. 904Henceforth, 905.Va csum_flags 906or 907.Va csum_data 908of a packet 909will denote the corresponding field of the 910.Va m_pkthdr 911member of the leading 912.Vt mbuf 913in the 914.Vt mbuf chain 915containing the packet. 916.Pp 917On output, checksum offloading is attempted after the outgoing 918interface has been determined for a packet. 919The interface-specific field 920.Va ifnet.if_data.ifi_hwassist 921(see 922.Xr ifnet 9 ) 923is consulted for the capabilities of the interface to assist in 924computing checksums. 925The 926.Va csum_flags 927field of the packet header is set to indicate which actions the interface 928is supposed to perform on it. 929The actions unsupported by the network interface are done in the 930software prior to passing the packet down to the interface driver; 931such actions will never be requested through 932.Va csum_flags . 933.Pp 934The flags demanding a particular action from an interface are as follows: 935.Bl -tag -width ".Dv CSUM_TCP" -offset indent 936.It Dv CSUM_IP 937The IP header checksum is to be computed and stored in the 938corresponding field of the packet. 939The hardware is expected to know the format of an IP header 940to determine the offset of the IP checksum field. 941.It Dv CSUM_TCP 942The TCP checksum is to be computed. 943(See below.) 944.It Dv CSUM_UDP 945The UDP checksum is to be computed. 946(See below.) 947.El 948.Pp 949Should a TCP or UDP checksum be offloaded to the hardware, 950the field 951.Va csum_data 952will contain the byte offset of the checksum field relative to the 953end of the IP header. 954In this case, the checksum field will be initially 955set by the TCP/IP module to the checksum of the pseudo header 956defined by the TCP and UDP specifications. 957.Pp 958For outbound packets which have been fragmented 959by the host CPU, the following will also be true, 960regardless of the checksum flag settings: 961.Bl -bullet -offset indent 962.It 963all fragments will have the flag 964.Dv M_FRAG 965set in their 966.Va m_flags 967field; 968.It 969the first and the last fragments in the chain will have 970.Dv M_FIRSTFRAG 971or 972.Dv M_LASTFRAG 973set in their 974.Va m_flags , 975correspondingly; 976.It 977the first fragment in the chain will have the total number 978of fragments contained in its 979.Va csum_data 980field. 981.El 982.Pp 983The last rule for fragmented packets takes precedence over the one 984for a TCP or UDP checksum. 985Nevertheless, offloading a TCP or UDP checksum is possible for a 986fragmented packet if the flag 987.Dv CSUM_IP_FRAGS 988is set in the field 989.Va ifnet.if_data.ifi_hwassist 990associated with the network interface. 991However, in this case the interface is expected to figure out 992the location of the checksum field within the sequence of fragments 993by itself because 994.Va csum_data 995contains a fragment count instead of a checksum offset value. 996.Pp 997On input, an interface indicates the actions it has performed 998on a packet by setting one or more of the following flags in 999.Va csum_flags 1000associated with the packet: 1001.Bl -tag -width ".Dv CSUM_IP_CHECKED" -offset indent 1002.It Dv CSUM_IP_CHECKED 1003The IP header checksum has been computed. 1004.It Dv CSUM_IP_VALID 1005The IP header has a valid checksum. 1006This flag can appear only in combination with 1007.Dv CSUM_IP_CHECKED . 1008.It Dv CSUM_DATA_VALID 1009The checksum of the data portion of the IP packet has been computed 1010and stored in the field 1011.Va csum_data 1012in network byte order. 1013.It Dv CSUM_PSEUDO_HDR 1014Can be set only along with 1015.Dv CSUM_DATA_VALID 1016to indicate that the IP data checksum found in 1017.Va csum_data 1018allows for the pseudo header defined by the TCP and UDP specifications. 1019Otherwise the checksum of the pseudo header must be calculated by 1020the host CPU and added to 1021.Va csum_data 1022to obtain the final checksum to be used for TCP or UDP validation purposes. 1023.El 1024.Pp 1025If a particular network interface just indicates success or 1026failure of TCP or UDP checksum validation without returning 1027the exact value of the checksum to the host CPU, its driver can mark 1028.Dv CSUM_DATA_VALID 1029and 1030.Dv CSUM_PSEUDO_HDR 1031in 1032.Va csum_flags , 1033and set 1034.Va csum_data 1035to 1036.Li 0xFFFF 1037hexadecimal to indicate a valid checksum. 1038It is a peculiarity of the algorithm used that the Internet checksum 1039calculated over any valid packet will be 1040.Li 0xFFFF 1041as long as the original checksum field is included. 1042.Pp 1043For inbound packets which are IP fragments, all 1044.Va csum_data 1045fields will be summed during reassembly to obtain the final checksum 1046value passed to an upper layer in the 1047.Va csum_data 1048field of the reassembled packet. 1049The 1050.Va csum_flags 1051fields of all fragments will be consolidated using logical AND 1052to obtain the final value for 1053.Va csum_flags . 1054Thus, in order to successfully 1055offload checksum computation for fragmented data, 1056all fragments should have the same value of 1057.Va csum_flags . 1058.Sh STRESS TESTING 1059When running a kernel compiled with the option 1060.Dv MBUF_STRESS_TEST , 1061the following 1062.Xr sysctl 8 Ns 1063-controlled options may be used to create 1064various failure/extreme cases for testing of network drivers 1065and other parts of the kernel that rely on 1066.Vt mbufs . 1067.Bl -tag -width ident 1068.It Va net.inet.ip.mbuf_frag_size 1069Causes 1070.Fn ip_output 1071to fragment outgoing 1072.Vt mbuf chains 1073into fragments of the specified size. 1074Setting this variable to 1 is an excellent way to 1075test the long 1076.Vt mbuf chain 1077handling ability of network drivers. 1078.It Va kern.ipc.m_defragrandomfailures 1079Causes the function 1080.Fn m_defrag 1081to randomly fail, returning 1082.Dv NULL . 1083Any piece of code which uses 1084.Fn m_defrag 1085should be tested with this feature. 1086.El 1087.Sh RETURN VALUES 1088See above. 1089.Sh SEE ALSO 1090.Xr ifnet 9 , 1091.Xr mbuf_tags 9 1092.Sh HISTORY 1093.\" Please correct me if I'm wrong 1094.Vt Mbufs 1095appeared in an early version of 1096.Bx . 1097Besides being used for network packets, they were used 1098to store various dynamic structures, such as routing table 1099entries, interface addresses, protocol control blocks, etc. 1100.Sh AUTHORS 1101The original 1102.Nm 1103manual page was written by Yar Tikhiy. 1104