1.\" Copyright (c) 2000 FreeBSD Inc. 2.\" All rights reserved. 3.\" 4.\" Redistribution and use in source and binary forms, with or without 5.\" modification, are permitted provided that the following conditions 6.\" are met: 7.\" 1. Redistributions of source code must retain the above copyright 8.\" notice, this list of conditions and the following disclaimer. 9.\" 2. Redistributions in binary form must reproduce the above copyright 10.\" notice, this list of conditions and the following disclaimer in the 11.\" documentation and/or other materials provided with the distribution. 12.\" 13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16.\" ARE DISCLAIMED. IN NO EVENT SHALL [your name] OR CONTRIBUTORS BE LIABLE 17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23.\" SUCH DAMAGE. 24.\" 25.\" $FreeBSD$ 26.\" 27.Dd August 7, 2005 28.Dt MBUF 9 29.Os 30.\" 31.Sh NAME 32.Nm mbuf 33.Nd "memory management in the kernel IPC subsystem" 34.\" 35.Sh SYNOPSIS 36.In sys/param.h 37.In sys/systm.h 38.In sys/mbuf.h 39.\" 40.Ss Mbuf allocation macros 41.Fn MGET "struct mbuf *mbuf" "int how" "short type" 42.Fn MGETHDR "struct mbuf *mbuf" "int how" "short type" 43.Fn MCLGET "struct mbuf *mbuf" "int how" 44.Fo MEXTADD 45.Fa "struct mbuf *mbuf" 46.Fa "caddr_t buf" 47.Fa "u_int size" 48.Fa "void (*free)(void *opt_args)" 49.Fa "void *opt_args" 50.Fa "short flags" 51.Fa "int type" 52.Fc 53.Fn MEXTFREE "struct mbuf *mbuf" 54.Fn MEXT_ADD_REF "struct mbuf *mbuf" 55.Fn MEXT_REM_REF "struct mbuf *mbuf" 56.Fn MFREE "struct mbuf *mbuf" "struct mbuf *successor" 57.\" 58.Ss Mbuf utility macros 59.Fn mtod "struct mbuf *mbuf" "type" 60.Ft int 61.Fn MEXT_IS_REF "struct mbuf *mbuf" 62.Fn M_ALIGN "struct mbuf *mbuf" "u_int len" 63.Fn MH_ALIGN "struct mbuf *mbuf" "u_int len" 64.Ft int 65.Fn M_LEADINGSPACE "struct mbuf *mbuf" 66.Ft int 67.Fn M_TRAILINGSPACE "struct mbuf *mbuf" 68.Fn M_MOVE_PKTHDR "struct mbuf *to" "struct mbuf *from" 69.Fn M_PREPEND "struct mbuf *mbuf" "int len" "int how" 70.Fn MCHTYPE "struct mbuf *mbuf" "u_int type" 71.Ft int 72.Fn M_WRITABLE "struct mbuf *mbuf" 73.\" 74.Ss Mbuf allocation functions 75.Ft struct mbuf * 76.Fn m_get "int how" "int type" 77.Ft struct mbuf * 78.Fn m_getm "struct mbuf *orig" "int len" "int how" "int type" 79.Ft struct mbuf * 80.Fn m_getcl "int how" "short type" "int flags" 81.Ft struct mbuf * 82.Fn m_getclr "int how" "int type" 83.Ft struct mbuf * 84.Fn m_gethdr "int how" "int type" 85.Ft struct mbuf * 86.Fn m_free "struct mbuf *mbuf" 87.Ft void 88.Fn m_freem "struct mbuf *mbuf" 89.\" 90.Ss Mbuf utility functions 91.Ft void 92.Fn m_adj "struct mbuf *mbuf" "int len" 93.Ft void 94.Fn m_align "struct mbuf *mbuf" "int len" 95.Ft int 96.Fn m_append "struct mbuf *mbuf" "int len" "c_caddr_t cp" 97.Ft struct mbuf * 98.Fn m_prepend "struct mbuf *mbuf" "int len" "int how" 99.Ft struct mbuf * 100.Fn m_copyup "struct mbuf *mbuf" "int len" "int dstoff" 101.Ft struct mbuf * 102.Fn m_pullup "struct mbuf *mbuf" "int len" 103.Ft struct mbuf * 104.Fn m_copym "struct mbuf *mbuf" "int offset" "int len" "int how" 105.Ft struct mbuf * 106.Fn m_copypacket "struct mbuf *mbuf" "int how" 107.Ft struct mbuf * 108.Fn m_dup "struct mbuf *mbuf" "int how" 109.Ft void 110.Fn m_copydata "const struct mbuf *mbuf" "int offset" "int len" "caddr_t buf" 111.Ft void 112.Fn m_copyback "struct mbuf *mbuf" "int offset" "int len" "caddr_t buf" 113.Ft struct mbuf * 114.Fo m_devget 115.Fa "char *buf" 116.Fa "int len" 117.Fa "int offset" 118.Fa "struct ifnet *ifp" 119.Fa "void (*copy)(char *from, caddr_t to, u_int len)" 120.Fc 121.Ft void 122.Fn m_cat "struct mbuf *m" "struct mbuf *n" 123.Ft u_int 124.Fn m_fixhdr "struct mbuf *mbuf" 125.Ft void 126.Fn m_dup_pkthdr "struct mbuf *to" "struct mbuf *from" 127.Ft void 128.Fn m_move_pkthdr "struct mbuf *to" "struct mbuf *from" 129.Ft u_int 130.Fn m_length "struct mbuf *mbuf" "struct mbuf **last" 131.Ft struct mbuf * 132.Fn m_split "struct mbuf *mbuf" "int len" "int how" 133.Ft int 134.Fn m_apply "struct mbuf *mbuf" "int off" "int len" "int (*f)(void *arg, void *data, u_int len)" "void *arg" 135.Ft struct mbuf * 136.Fn m_getptr "struct mbuf *mbuf" "int loc" "int *off" 137.Ft struct mbuf * 138.Fn m_defrag "struct mbuf *m0" "int how" 139.\" 140.Sh DESCRIPTION 141An 142.Vt mbuf 143is a basic unit of memory management in the kernel IPC subsystem. 144Network packets and socket buffers are stored in 145.Vt mbufs . 146A network packet may span multiple 147.Vt mbufs 148arranged into a 149.Vt mbuf chain 150(linked list), 151which allows adding or trimming 152network headers with little overhead. 153.Pp 154While a developer should not bother with 155.Vt mbuf 156internals without serious 157reason in order to avoid incompatibilities with future changes, it 158is useful to understand the general structure of an 159.Vt mbuf . 160.Pp 161An 162.Vt mbuf 163consists of a variable-sized header and a small internal 164buffer for data. 165The total size of an 166.Vt mbuf , 167.Dv MSIZE , 168is a constant defined in 169.In sys/param.h . 170The 171.Vt mbuf 172header includes: 173.Pp 174.Bl -tag -width "m_nextpkt" -offset indent 175.It Va m_next 176.Pq Vt struct mbuf * 177A pointer to the next 178.Vt mbuf 179in the 180.Vt mbuf chain . 181.It Va m_nextpkt 182.Pq Vt struct mbuf * 183A pointer to the next 184.Vt mbuf chain 185in the queue. 186.It Va m_data 187.Pq Vt caddr_t 188A pointer to data attached to this 189.Vt mbuf . 190.It Va m_len 191.Pq Vt int 192The length of the data. 193.It Va m_type 194.Pq Vt short 195The type of the data. 196.It Va m_flags 197.Pq Vt int 198The 199.Vt mbuf 200flags. 201.El 202.Pp 203The 204.Vt mbuf 205flag bits are defined as follows: 206.Bd -literal 207/* mbuf flags */ 208#define M_EXT 0x0001 /* has associated external storage */ 209#define M_PKTHDR 0x0002 /* start of record */ 210#define M_EOR 0x0004 /* end of record */ 211#define M_RDONLY 0x0008 /* associated data marked read-only */ 212#define M_PROTO1 0x0010 /* protocol-specific */ 213#define M_PROTO2 0x0020 /* protocol-specific */ 214#define M_PROTO3 0x0040 /* protocol-specific */ 215#define M_PROTO4 0x0080 /* protocol-specific */ 216#define M_PROTO5 0x0100 /* protocol-specific */ 217#define M_PROTO6 0x4000 /* protocol-specific (avoid M_BCAST conflict) */ 218#define M_FREELIST 0x8000 /* mbuf is on the free list */ 219 220/* mbuf pkthdr flags (also stored in m_flags) */ 221#define M_BCAST 0x0200 /* send/received as link-level broadcast */ 222#define M_MCAST 0x0400 /* send/received as link-level multicast */ 223#define M_FRAG 0x0800 /* packet is fragment of larger packet */ 224#define M_FIRSTFRAG 0x1000 /* packet is first fragment */ 225#define M_LASTFRAG 0x2000 /* packet is last fragment */ 226.Ed 227.Pp 228The available 229.Vt mbuf 230types are defined as follows: 231.Bd -literal 232/* mbuf types */ 233#define MT_DATA 1 /* dynamic (data) allocation */ 234#define MT_HEADER 2 /* packet header */ 235#define MT_SONAME 8 /* socket name */ 236#define MT_FTABLE 11 /* fragment reassembly header */ 237#define MT_CONTROL 14 /* extra-data protocol message */ 238#define MT_OOBDATA 15 /* expedited data */ 239.Ed 240.Pp 241If the 242.Dv M_PKTHDR 243flag is set, a 244.Vt struct pkthdr Va m_pkthdr 245is added to the 246.Vt mbuf 247header. 248It contains a pointer to the interface 249the packet has been received from 250.Pq Vt struct ifnet Va *rcvif , 251and the total packet length 252.Pq Vt int Va len . 253Optionally, it may also contain an attached list of packet tags 254.Pq Vt "struct m_tag" . 255See 256.Xr mbuf_tags 9 257for details. 258Fields used in offloading checksum calculation to the hardware are kept in 259.Va m_pkthdr 260as well. 261See 262.Sx HARDWARE-ASSISTED CHECKSUM CALCULATION 263for details. 264.Pp 265If small enough, data is stored in the internal data buffer of an 266.Vt mbuf . 267If the data is sufficiently large, another 268.Vt mbuf 269may be added to the 270.Vt mbuf chain , 271or external storage may be associated with the 272.Vt mbuf . 273.Dv MHLEN 274bytes of data can fit into an 275.Vt mbuf 276with the 277.Dv M_PKTHDR 278flag set, 279.Dv MLEN 280bytes can otherwise. 281.Pp 282If external storage is being associated with an 283.Vt mbuf , 284the 285.Va m_ext 286header is added at the cost of losing the internal data buffer. 287It includes a pointer to external storage, the size of the storage, 288a pointer to a function used for freeing the storage, 289a pointer to an optional argument that can be passed to the function, 290and a pointer to a reference counter. 291An 292.Vt mbuf 293using external storage has the 294.Dv M_EXT 295flag set. 296.Pp 297The system supplies a macro for allocating the desired external storage 298buffer, 299.Dv MEXTADD . 300.Pp 301The allocation and management of the reference counter is handled by the 302subsystem. 303The developer can check whether the reference count for the 304external storage of a given 305.Vt mbuf 306is greater than 1 with the 307.Dv MEXT_IS_REF 308macro. 309Similarly, the developer can directly add and remove references, 310if absolutely necessary, with the use of the 311.Dv MEXT_ADD_REF 312and 313.Dv MEXT_REM_REF 314macros. 315.Pp 316The system also supplies a default type of external storage buffer called an 317.Vt mbuf cluster . 318.Vt Mbuf clusters 319can be allocated and configured with the use of the 320.Dv MCLGET 321macro. 322Each 323.Vt mbuf cluster 324is 325.Dv MCLBYTES 326in size, where MCLBYTES is a machine-dependent constant. 327The system defines an advisory macro 328.Dv MINCLSIZE , 329which is the smallest amount of data to put into an 330.Vt mbuf cluster . 331It is equal to the sum of 332.Dv MLEN 333and 334.Dv MHLEN . 335It is typically preferable to store data into the data region of an 336.Vt mbuf , 337if size permits, as opposed to allocating a separate 338.Vt mbuf cluster 339to hold the same data. 340.\" 341.Ss Macros and Functions 342There are numerous predefined macros and functions that provide the 343developer with common utilities. 344.\" 345.Bl -ohang -offset indent 346.It Fn mtod mbuf type 347Convert an 348.Fa mbuf 349pointer to a data pointer. 350The macro expands to the data pointer cast to the pointer of the specified 351.Fa type . 352.Sy Note : 353It is advisable to ensure that there is enough contiguous data in 354.Fa mbuf . 355See 356.Fn m_pullup 357for details. 358.It Fn MGET mbuf how type 359Allocate an 360.Vt mbuf 361and initialize it to contain internal data. 362.Fa mbuf 363will point to the allocated 364.Vt mbuf 365on success, or be set to 366.Dv NULL 367on failure. 368The 369.Fa how 370argument is to be set to 371.Dv M_TRYWAIT 372or 373.Dv M_DONTWAIT . 374It specifies whether the caller is willing to block if necessary. 375If 376.Fa how 377is set to 378.Dv M_TRYWAIT , 379a failed allocation will result in the caller being put 380to sleep for a designated 381kern.ipc.mbuf_wait 382.Xr ( sysctl 8 383tunable) 384number of ticks. 385A number of other functions and macros related to 386.Vt mbufs 387have the same argument because they may 388at some point need to allocate new 389.Vt mbufs . 390.Pp 391Programmers should be careful not to confuse the 392.Vt mbuf 393allocation flag 394.Dv M_DONTWAIT 395with the 396.Xr malloc 9 397allocation flag, 398.Dv M_NOWAIT . 399They are not the same. 400.It Fn MGETHDR mbuf how type 401Allocate an 402.Vt mbuf 403and initialize it to contain a packet header 404and internal data. 405See 406.Fn MGET 407for details. 408.It Fn MCLGET mbuf how 409Allocate and attach an 410.Vt mbuf cluster 411to 412.Fa mbuf . 413If the macro fails, the 414.Dv M_EXT 415flag will not be set in 416.Fa mbuf . 417.It Fn M_ALIGN mbuf len 418Set the pointer 419.Fa mbuf->m_data 420to place an object of the size 421.Fa len 422at the end of the internal data area of 423.Fa mbuf , 424long word aligned. 425Applicable only if 426.Fa mbuf 427is newly allocated with 428.Fn MGET 429or 430.Fn m_get . 431.It Fn MH_ALIGN mbuf len 432Serves the same purpose as 433.Fn M_ALIGN 434does, but only for 435.Fa mbuf 436newly allocated with 437.Fn MGETHDR 438or 439.Fn m_gethdr , 440or initialized by 441.Fn m_dup_pkthdr 442or 443.Fn m_move_pkthdr . 444.It Fn m_align mbuf len 445Services the same purpose as 446.Fn M_ALIGN 447but handles any type of mbuf. 448.It Fn M_LEADINGSPACE mbuf 449Returns the number of bytes available before the beginning 450of data in 451.Fa mbuf . 452.It Fn M_TRAILINGSPACE mbuf 453Returns the number of bytes available after the end of data in 454.Fa mbuf . 455.It Fn M_PREPEND mbuf len how 456This macro operates on an 457.Vt mbuf chain . 458It is an optimized wrapper for 459.Fn m_prepend 460that can make use of possible empty space before data 461(e.g.\& left after trimming of a link-layer header). 462The new 463.Vt mbuf chain 464pointer or 465.Dv NULL 466is in 467.Fa mbuf 468after the call. 469.It Fn M_MOVE_PKTHDR to from 470Using this macro is equivalent to calling 471.Fn m_move_pkthdr to from . 472.It Fn M_WRITABLE mbuf 473This macro will evaluate true if 474.Fa mbuf 475is not marked 476.Dv M_RDONLY 477and if either 478.Fa mbuf 479does not contain external storage or, 480if it does, 481then if the reference count of the storage is not greater than 1. 482The 483.Dv M_RDONLY 484flag can be set in 485.Fa mbuf->m_flags . 486This can be achieved during setup of the external storage, 487by passing the 488.Dv M_RDONLY 489bit as a 490.Fa flags 491argument to the 492.Fn MEXTADD 493macro, or can be directly set in individual 494.Vt mbufs . 495.It Fn MCHTYPE mbuf type 496Change the type of 497.Fa mbuf 498to 499.Fa type . 500This is a relatively expensive operation and should be avoided. 501.El 502.Pp 503The functions are: 504.Bl -ohang -offset indent 505.It Fn m_get how type 506A function version of 507.Fn MGET 508for non-critical paths. 509.It Fn m_getm orig len how type 510Allocate 511.Fa len 512bytes worth of 513.Vt mbufs 514and 515.Vt mbuf clusters 516if necessary and append the resulting allocated 517.Vt mbuf chain 518to the 519.Vt mbuf chain 520.Fa orig , 521if it is 522.No non- Ns Dv NULL . 523If the allocation fails at any point, 524free whatever was allocated and return 525.Dv NULL . 526If 527.Fa orig 528is 529.No non- Ns Dv NULL , 530it will not be freed. 531It is possible to use 532.Fn m_getm 533to either append 534.Fa len 535bytes to an existing 536.Vt mbuf 537or 538.Vt mbuf chain 539(for example, one which may be sitting in a pre-allocated ring) 540or to simply perform an all-or-nothing 541.Vt mbuf 542and 543.Vt mbuf cluster 544allocation. 545.It Fn m_gethdr how type 546A function version of 547.Fn MGETHDR 548for non-critical paths. 549.It Fn m_getcl how type flags 550Fetch an 551.Vt mbuf 552with a 553.Vt mbuf cluster 554attached to it. 555If one of the allocations fails, the entire allocation fails. 556This routine is the preferred way of fetching both the 557.Vt mbuf 558and 559.Vt mbuf cluster 560together, as it avoids having to unlock/relock between allocations. 561Returns 562.Dv NULL 563on failure. 564.It Fn m_getclr how type 565Allocate an 566.Vt mbuf 567and zero out the data region. 568.It Fn m_free mbuf 569Frees 570.Vt mbuf . 571Returns 572.Va m_next 573of the freed 574.Vt mbuf . 575.El 576.Pp 577The functions below operate on 578.Vt mbuf chains . 579.Bl -ohang -offset indent 580.It Fn m_freem mbuf 581Free an entire 582.Vt mbuf chain , 583including any external storage. 584.\" 585.It Fn m_adj mbuf len 586Trim 587.Fa len 588bytes from the head of an 589.Vt mbuf chain 590if 591.Fa len 592is positive, from the tail otherwise. 593.\" 594.It Fn m_append mbuf len cp 595Append 596.Vt len 597bytes of data 598.Vt cp 599to the 600.Vt mbuf chain . 601Extend the mbuf chain if the new data does not fit in 602existing space. 603.\" 604.It Fn m_prepend mbuf len how 605Allocate a new 606.Vt mbuf 607and prepend it to the 608.Vt mbuf chain , 609handle 610.Dv M_PKTHDR 611properly. 612.Sy Note : 613It does not allocate any 614.Vt mbuf clusters , 615so 616.Fa len 617must be less than 618.Dv MLEN 619or 620.Dv MHLEN , 621depending on the 622.Dv M_PKTHDR 623flag setting. 624.\" 625.It Fn m_copyup mbuf len dstoff 626Similar to 627.Fn m_pullup 628but copies 629.Fa len 630bytes of data into a new mbuf at 631.Fa dstoff 632bytes into the mbuf. 633The 634.Fa dstoff 635argument aligns the data and leaves room for a link layer header. 636Returns the new 637.Vt mbuf chain 638on success, 639and frees the 640.Vt mbuf chain 641and returns 642.Dv NULL 643on failure. 644.Sy Note : 645The function does not allocate 646.Vt mbuf clusters , 647so 648.Fa len + dstoff 649must be less than 650.Dv MHLEN . 651.\" 652.It Fn m_pullup mbuf len 653Arrange that the first 654.Fa len 655bytes of an 656.Vt mbuf chain 657are contiguous and lay in the data area of 658.Fa mbuf , 659so they are accessible with 660.Fn mtod mbuf type . 661Return the new 662.Vt mbuf chain 663on success, 664.Dv NULL 665on failure 666(the 667.Vt mbuf chain 668is freed in this case). 669.Sy Note : 670It does not allocate any 671.Vt mbuf clusters , 672so 673.Fa len 674must be less than 675.Dv MHLEN . 676.\" 677.It Fn m_copym mbuf offset len how 678Make a copy of an 679.Vt mbuf chain 680starting 681.Fa offset 682bytes from the beginning, continuing for 683.Fa len 684bytes. 685If 686.Fa len 687is 688.Dv M_COPYALL , 689copy to the end of the 690.Vt mbuf chain . 691.Sy Note : 692The copy is read-only, because the 693.Vt mbuf clusters 694are not copied, only their reference counts are incremented. 695.\" 696.It Fn m_copypacket mbuf how 697Copy an entire packet including header, which must be present. 698This is an optimized version of the common case 699.Fn m_copym mbuf 0 M_COPYALL how . 700.Sy Note : 701the copy is read-only, because the 702.Vt mbuf clusters 703are not copied, only their reference counts are incremented. 704.\" 705.It Fn m_dup mbuf how 706Copy a packet header 707.Vt mbuf chain 708into a completely new 709.Vt mbuf chain , 710including copying any 711.Vt mbuf clusters . 712Use this instead of 713.Fn m_copypacket 714when you need a writable copy of an 715.Vt mbuf chain . 716.\" 717.It Fn m_copydata mbuf offset len buf 718Copy data from an 719.Vt mbuf chain 720starting 721.Fa off 722bytes from the beginning, continuing for 723.Fa len 724bytes, into the indicated buffer 725.Fa buf . 726.\" 727.It Fn m_copyback mbuf offset len buf 728Copy 729.Fa len 730bytes from the buffer 731.Fa buf 732back into the indicated 733.Vt mbuf chain , 734starting at 735.Fa offset 736bytes from the beginning of the 737.Vt mbuf chain , 738extending the 739.Vt mbuf chain 740if necessary. 741.Sy Note : 742It does not allocate any 743.Vt mbuf clusters , 744just adds 745.Vt mbufs 746to the 747.Vt mbuf chain . 748It is safe to set 749.Fa offset 750beyond the current 751.Vt mbuf chain 752end: zeroed 753.Vt mbufs 754will be allocated to fill the space. 755.\" 756.It Fn m_length mbuf last 757Return the length of the 758.Vt mbuf chain , 759and optionally a pointer to the last 760.Vt mbuf . 761.\" 762.It Fn m_dup_pkthdr to from how 763Upon the function's completion, the 764.Vt mbuf 765.Fa to 766will contain an identical copy of 767.Fa from->m_pkthdr 768and the per-packet attributes found in the 769.Vt mbuf chain 770.Fa from . 771The 772.Vt mbuf 773.Fa from 774must have the flag 775.Dv M_PKTHDR 776initially set, and 777.Fa to 778must be empty on entry. 779.\" 780.It Fn m_move_pkthdr to from 781Move 782.Va m_pkthdr 783and the per-packet attributes from the 784.Vt mbuf chain 785.Fa from 786to the 787.Vt mbuf 788.Fa to . 789The 790.Vt mbuf 791.Fa from 792must have the flag 793.Dv M_PKTHDR 794initially set, and 795.Fa to 796must be empty on entry. 797Upon the function's completion, 798.Fa from 799will have the flag 800.Dv M_PKTHDR 801and the per-packet attributes cleared. 802.\" 803.It Fn m_fixhdr mbuf 804Set the packet-header length to the length of the 805.Vt mbuf chain . 806.\" 807.It Fn m_devget buf len offset ifp copy 808Copy data from a device local memory pointed to by 809.Fa buf 810to an 811.Vt mbuf chain . 812The copy is done using a specified copy routine 813.Fa copy , 814or 815.Fn bcopy 816if 817.Fa copy 818is 819.Dv NULL . 820.\" 821.It Fn m_cat m n 822Concatenate 823.Fa n 824to 825.Fa m . 826Both 827.Vt mbuf chains 828must be of the same type. 829.Fa N 830is still valid after the function returned. 831.Sy Note : 832It does not handle 833.Dv M_PKTHDR 834and friends. 835.\" 836.It Fn m_split mbuf len how 837Partition an 838.Vt mbuf chain 839in two pieces, returning the tail: 840all but the first 841.Fa len 842bytes. 843In case of failure, it returns 844.Dv NULL 845and attempts to restore the 846.Vt mbuf chain 847to its original state. 848.\" 849.It Fn m_apply mbuf off len f arg 850Apply a function to an 851.Vt mbuf chain , 852at offset 853.Fa off , 854for length 855.Fa len 856bytes. 857Typically used to avoid calls to 858.Fn m_pullup 859which would otherwise be unnecessary or undesirable. 860.Fa arg 861is a convenience argument which is passed to the callback function 862.Fa f . 863.Pp 864Each time 865.Fn f 866is called, it will be passed 867.Fa arg , 868a pointer to the 869.Fa data 870in the current mbuf, and the length 871.Fa len 872of the data in this mbuf to which the function should be applied. 873.Pp 874The function should return zero to indicate success; 875otherwise, if an error is indicated, then 876.Fn m_apply 877will return the error and stop iterating through the 878.Vt mbuf chain . 879.\" 880.It Fn m_getptr mbuf loc off 881Return a pointer to the mbuf containing the data located at 882.Fa loc 883bytes from the beginning of the 884.Vt mbuf chain . 885The corresponding offset into the mbuf will be stored in 886.Fa *off . 887.It Fn m_defrag m0 how 888Defragment an mbuf chain, returning the shortest possible 889chain of mbufs and clusters. 890If allocation fails and this can not be completed, 891.Dv NULL 892will be returned and the original chain will be unchanged. 893Upon success, the original chain will be freed and the new 894chain will be returned. 895.Fa how 896should be either 897.Dv M_TRYWAIT 898or 899.Dv M_DONTWAIT , 900depending on the caller's preference. 901.Pp 902This function is especially useful in network drivers, where 903certain long mbuf chains must be shortened before being added 904to TX descriptor lists. 905.El 906.Sh HARDWARE-ASSISTED CHECKSUM CALCULATION 907This section currently applies to TCP/IP only. 908In order to save the host CPU resources, computing checksums is 909offloaded to the network interface hardware if possible. 910The 911.Va m_pkthdr 912member of the leading 913.Vt mbuf 914of a packet contains two fields used for that purpose, 915.Vt int Va csum_flags 916and 917.Vt int Va csum_data . 918The meaning of those fields depends on the direction a packet flows in, 919and on whether the packet is fragmented. 920Henceforth, 921.Va csum_flags 922or 923.Va csum_data 924of a packet 925will denote the corresponding field of the 926.Va m_pkthdr 927member of the leading 928.Vt mbuf 929in the 930.Vt mbuf chain 931containing the packet. 932.Pp 933On output, checksum offloading is attempted after the outgoing 934interface has been determined for a packet. 935The interface-specific field 936.Va ifnet.if_data.ifi_hwassist 937(see 938.Xr ifnet 9 ) 939is consulted for the capabilities of the interface to assist in 940computing checksums. 941The 942.Va csum_flags 943field of the packet header is set to indicate which actions the interface 944is supposed to perform on it. 945The actions unsupported by the network interface are done in the 946software prior to passing the packet down to the interface driver; 947such actions will never be requested through 948.Va csum_flags . 949.Pp 950The flags demanding a particular action from an interface are as follows: 951.Bl -tag -width ".Dv CSUM_TCP" -offset indent 952.It Dv CSUM_IP 953The IP header checksum is to be computed and stored in the 954corresponding field of the packet. 955The hardware is expected to know the format of an IP header 956to determine the offset of the IP checksum field. 957.It Dv CSUM_TCP 958The TCP checksum is to be computed. 959(See below.) 960.It Dv CSUM_UDP 961The UDP checksum is to be computed. 962(See below.) 963.El 964.Pp 965Should a TCP or UDP checksum be offloaded to the hardware, 966the field 967.Va csum_data 968will contain the byte offset of the checksum field relative to the 969end of the IP header. 970In this case, the checksum field will be initially 971set by the TCP/IP module to the checksum of the pseudo header 972defined by the TCP and UDP specifications. 973.Pp 974For outbound packets which have been fragmented 975by the host CPU, the following will also be true, 976regardless of the checksum flag settings: 977.Bl -bullet -offset indent 978.It 979all fragments will have the flag 980.Dv M_FRAG 981set in their 982.Va m_flags 983field; 984.It 985the first and the last fragments in the chain will have 986.Dv M_FIRSTFRAG 987or 988.Dv M_LASTFRAG 989set in their 990.Va m_flags , 991correspondingly; 992.It 993the first fragment in the chain will have the total number 994of fragments contained in its 995.Va csum_data 996field. 997.El 998.Pp 999The last rule for fragmented packets takes precedence over the one 1000for a TCP or UDP checksum. 1001Nevertheless, offloading a TCP or UDP checksum is possible for a 1002fragmented packet if the flag 1003.Dv CSUM_IP_FRAGS 1004is set in the field 1005.Va ifnet.if_data.ifi_hwassist 1006associated with the network interface. 1007However, in this case the interface is expected to figure out 1008the location of the checksum field within the sequence of fragments 1009by itself because 1010.Va csum_data 1011contains a fragment count instead of a checksum offset value. 1012.Pp 1013On input, an interface indicates the actions it has performed 1014on a packet by setting one or more of the following flags in 1015.Va csum_flags 1016associated with the packet: 1017.Bl -tag -width ".Dv CSUM_IP_CHECKED" -offset indent 1018.It Dv CSUM_IP_CHECKED 1019The IP header checksum has been computed. 1020.It Dv CSUM_IP_VALID 1021The IP header has a valid checksum. 1022This flag can appear only in combination with 1023.Dv CSUM_IP_CHECKED . 1024.It Dv CSUM_DATA_VALID 1025The checksum of the data portion of the IP packet has been computed 1026and stored in the field 1027.Va csum_data 1028in network byte order. 1029.It Dv CSUM_PSEUDO_HDR 1030Can be set only along with 1031.Dv CSUM_DATA_VALID 1032to indicate that the IP data checksum found in 1033.Va csum_data 1034allows for the pseudo header defined by the TCP and UDP specifications. 1035Otherwise the checksum of the pseudo header must be calculated by 1036the host CPU and added to 1037.Va csum_data 1038to obtain the final checksum to be used for TCP or UDP validation purposes. 1039.El 1040.Pp 1041If a particular network interface just indicates success or 1042failure of TCP or UDP checksum validation without returning 1043the exact value of the checksum to the host CPU, its driver can mark 1044.Dv CSUM_DATA_VALID 1045and 1046.Dv CSUM_PSEUDO_HDR 1047in 1048.Va csum_flags , 1049and set 1050.Va csum_data 1051to 1052.Li 0xFFFF 1053hexadecimal to indicate a valid checksum. 1054It is a peculiarity of the algorithm used that the Internet checksum 1055calculated over any valid packet will be 1056.Li 0xFFFF 1057as long as the original checksum field is included. 1058.Pp 1059For inbound packets which are IP fragments, all 1060.Va csum_data 1061fields will be summed during reassembly to obtain the final checksum 1062value passed to an upper layer in the 1063.Va csum_data 1064field of the reassembled packet. 1065The 1066.Va csum_flags 1067fields of all fragments will be consolidated using logical AND 1068to obtain the final value for 1069.Va csum_flags . 1070Thus, in order to successfully 1071offload checksum computation for fragmented data, 1072all fragments should have the same value of 1073.Va csum_flags . 1074.Sh STRESS TESTING 1075When running a kernel compiled with the option 1076.Dv MBUF_STRESS_TEST , 1077the following 1078.Xr sysctl 8 Ns 1079-controlled options may be used to create 1080various failure/extreme cases for testing of network drivers 1081and other parts of the kernel that rely on 1082.Vt mbufs . 1083.Bl -tag -width ident 1084.It Va net.inet.ip.mbuf_frag_size 1085Causes 1086.Fn ip_output 1087to fragment outgoing 1088.Vt mbuf chains 1089into fragments of the specified size. 1090Setting this variable to 1 is an excellent way to 1091test the long 1092.Vt mbuf chain 1093handling ability of network drivers. 1094.It Va kern.ipc.m_defragrandomfailures 1095Causes the function 1096.Fn m_defrag 1097to randomly fail, returning 1098.Dv NULL . 1099Any piece of code which uses 1100.Fn m_defrag 1101should be tested with this feature. 1102.El 1103.Sh RETURN VALUES 1104See above. 1105.Sh SEE ALSO 1106.Xr ifnet 9 , 1107.Xr mbuf_tags 9 1108.Sh HISTORY 1109.\" Please correct me if I'm wrong 1110.Vt Mbufs 1111appeared in an early version of 1112.Bx . 1113Besides being used for network packets, they were used 1114to store various dynamic structures, such as routing table 1115entries, interface addresses, protocol control blocks, etc. 1116.Sh AUTHORS 1117The original 1118.Nm 1119manual page was written by Yar Tikhiy. 1120