1.\" Copyright (c) 2000 FreeBSD Inc. 2.\" All rights reserved. 3.\" 4.\" Redistribution and use in source and binary forms, with or without 5.\" modification, are permitted provided that the following conditions 6.\" are met: 7.\" 1. Redistributions of source code must retain the above copyright 8.\" notice, this list of conditions and the following disclaimer. 9.\" 2. Redistributions in binary form must reproduce the above copyright 10.\" notice, this list of conditions and the following disclaimer in the 11.\" documentation and/or other materials provided with the distribution. 12.\" 13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16.\" ARE DISCLAIMED. IN NO EVENT SHALL [your name] OR CONTRIBUTORS BE LIABLE 17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23.\" SUCH DAMAGE. 24.\" 25.\" $FreeBSD$ 26.\" 27.Dd August 7, 2010 28.Dt MBUF 9 29.Os 30.\" 31.Sh NAME 32.Nm mbuf 33.Nd "memory management in the kernel IPC subsystem" 34.\" 35.Sh SYNOPSIS 36.In sys/param.h 37.In sys/systm.h 38.In sys/mbuf.h 39.\" 40.Ss Mbuf allocation macros 41.Fn MGET "struct mbuf *mbuf" "int how" "short type" 42.Fn MGETHDR "struct mbuf *mbuf" "int how" "short type" 43.Fn MCLGET "struct mbuf *mbuf" "int how" 44.Fo MEXTADD 45.Fa "struct mbuf *mbuf" 46.Fa "caddr_t buf" 47.Fa "u_int size" 48.Fa "void (*free)(void *opt_arg1, void *opt_arg2)" 49.Fa "void *opt_arg1" 50.Fa "void *opt_arg2" 51.Fa "short flags" 52.Fa "int type" 53.Fc 54.Fn MEXTFREE "struct mbuf *mbuf" 55.Fn MFREE "struct mbuf *mbuf" "struct mbuf *successor" 56.\" 57.Ss Mbuf utility macros 58.Fn mtod "struct mbuf *mbuf" "type" 59.Fn M_ALIGN "struct mbuf *mbuf" "u_int len" 60.Fn MH_ALIGN "struct mbuf *mbuf" "u_int len" 61.Ft int 62.Fn M_LEADINGSPACE "struct mbuf *mbuf" 63.Ft int 64.Fn M_TRAILINGSPACE "struct mbuf *mbuf" 65.Fn M_MOVE_PKTHDR "struct mbuf *to" "struct mbuf *from" 66.Fn M_PREPEND "struct mbuf *mbuf" "int len" "int how" 67.Fn MCHTYPE "struct mbuf *mbuf" "u_int type" 68.Ft int 69.Fn M_WRITABLE "struct mbuf *mbuf" 70.\" 71.Ss Mbuf allocation functions 72.Ft struct mbuf * 73.Fn m_get "int how" "int type" 74.Ft struct mbuf * 75.Fn m_getm "struct mbuf *orig" "int len" "int how" "int type" 76.Ft struct mbuf * 77.Fn m_getcl "int how" "short type" "int flags" 78.Ft struct mbuf * 79.Fn m_getclr "int how" "int type" 80.Ft struct mbuf * 81.Fn m_gethdr "int how" "int type" 82.Ft struct mbuf * 83.Fn m_free "struct mbuf *mbuf" 84.Ft void 85.Fn m_freem "struct mbuf *mbuf" 86.\" 87.Ss Mbuf utility functions 88.Ft void 89.Fn m_adj "struct mbuf *mbuf" "int len" 90.Ft void 91.Fn m_align "struct mbuf *mbuf" "int len" 92.Ft int 93.Fn m_append "struct mbuf *mbuf" "int len" "c_caddr_t cp" 94.Ft struct mbuf * 95.Fn m_prepend "struct mbuf *mbuf" "int len" "int how" 96.Ft struct mbuf * 97.Fn m_copyup "struct mbuf *mbuf" "int len" "int dstoff" 98.Ft struct mbuf * 99.Fn m_pullup "struct mbuf *mbuf" "int len" 100.Ft struct mbuf * 101.Fn m_pulldown "struct mbuf *mbuf" "int offset" "int len" "int *offsetp" 102.Ft struct mbuf * 103.Fn m_copym "struct mbuf *mbuf" "int offset" "int len" "int how" 104.Ft struct mbuf * 105.Fn m_copypacket "struct mbuf *mbuf" "int how" 106.Ft struct mbuf * 107.Fn m_dup "struct mbuf *mbuf" "int how" 108.Ft void 109.Fn m_copydata "const struct mbuf *mbuf" "int offset" "int len" "caddr_t buf" 110.Ft void 111.Fn m_copyback "struct mbuf *mbuf" "int offset" "int len" "caddr_t buf" 112.Ft struct mbuf * 113.Fo m_devget 114.Fa "char *buf" 115.Fa "int len" 116.Fa "int offset" 117.Fa "struct ifnet *ifp" 118.Fa "void (*copy)(char *from, caddr_t to, u_int len)" 119.Fc 120.Ft void 121.Fn m_cat "struct mbuf *m" "struct mbuf *n" 122.Ft u_int 123.Fn m_fixhdr "struct mbuf *mbuf" 124.Ft void 125.Fn m_dup_pkthdr "struct mbuf *to" "struct mbuf *from" 126.Ft void 127.Fn m_move_pkthdr "struct mbuf *to" "struct mbuf *from" 128.Ft u_int 129.Fn m_length "struct mbuf *mbuf" "struct mbuf **last" 130.Ft struct mbuf * 131.Fn m_split "struct mbuf *mbuf" "int len" "int how" 132.Ft int 133.Fn m_apply "struct mbuf *mbuf" "int off" "int len" "int (*f)(void *arg, void *data, u_int len)" "void *arg" 134.Ft struct mbuf * 135.Fn m_getptr "struct mbuf *mbuf" "int loc" "int *off" 136.Ft struct mbuf * 137.Fn m_defrag "struct mbuf *m0" "int how" 138.Ft struct mbuf * 139.Fn m_unshare "struct mbuf *m0" "int how" 140.\" 141.Sh DESCRIPTION 142An 143.Vt mbuf 144is a basic unit of memory management in the kernel IPC subsystem. 145Network packets and socket buffers are stored in 146.Vt mbufs . 147A network packet may span multiple 148.Vt mbufs 149arranged into a 150.Vt mbuf chain 151(linked list), 152which allows adding or trimming 153network headers with little overhead. 154.Pp 155While a developer should not bother with 156.Vt mbuf 157internals without serious 158reason in order to avoid incompatibilities with future changes, it 159is useful to understand the general structure of an 160.Vt mbuf . 161.Pp 162An 163.Vt mbuf 164consists of a variable-sized header and a small internal 165buffer for data. 166The total size of an 167.Vt mbuf , 168.Dv MSIZE , 169is a constant defined in 170.In sys/param.h . 171The 172.Vt mbuf 173header includes: 174.Bl -tag -width "m_nextpkt" -offset indent 175.It Va m_next 176.Pq Vt struct mbuf * 177A pointer to the next 178.Vt mbuf 179in the 180.Vt mbuf chain . 181.It Va m_nextpkt 182.Pq Vt struct mbuf * 183A pointer to the next 184.Vt mbuf chain 185in the queue. 186.It Va m_data 187.Pq Vt caddr_t 188A pointer to data attached to this 189.Vt mbuf . 190.It Va m_len 191.Pq Vt int 192The length of the data. 193.It Va m_type 194.Pq Vt short 195The type of the data. 196.It Va m_flags 197.Pq Vt int 198The 199.Vt mbuf 200flags. 201.El 202.Pp 203The 204.Vt mbuf 205flag bits are defined as follows: 206.Bd -literal 207/* mbuf flags */ 208#define M_EXT 0x0001 /* has associated external storage */ 209#define M_PKTHDR 0x0002 /* start of record */ 210#define M_EOR 0x0004 /* end of record */ 211#define M_RDONLY 0x0008 /* associated data marked read-only */ 212#define M_PROTO1 0x0010 /* protocol-specific */ 213#define M_PROTO2 0x0020 /* protocol-specific */ 214#define M_PROTO3 0x0040 /* protocol-specific */ 215#define M_PROTO4 0x0080 /* protocol-specific */ 216#define M_PROTO5 0x0100 /* protocol-specific */ 217#define M_PROTO6 0x4000 /* protocol-specific (avoid M_BCAST conflict) */ 218#define M_FREELIST 0x8000 /* mbuf is on the free list */ 219 220/* mbuf pkthdr flags (also stored in m_flags) */ 221#define M_BCAST 0x0200 /* send/received as link-level broadcast */ 222#define M_MCAST 0x0400 /* send/received as link-level multicast */ 223#define M_FRAG 0x0800 /* packet is fragment of larger packet */ 224#define M_FIRSTFRAG 0x1000 /* packet is first fragment */ 225#define M_LASTFRAG 0x2000 /* packet is last fragment */ 226.Ed 227.Pp 228The available 229.Vt mbuf 230types are defined as follows: 231.Bd -literal 232/* mbuf types */ 233#define MT_DATA 1 /* dynamic (data) allocation */ 234#define MT_HEADER MT_DATA /* packet header */ 235#define MT_SONAME 8 /* socket name */ 236#define MT_CONTROL 14 /* extra-data protocol message */ 237#define MT_OOBDATA 15 /* expedited data */ 238.Ed 239.Pp 240The available external buffer types are defined as follows: 241.Bd -literal 242/* external buffer types */ 243#define EXT_CLUSTER 1 /* mbuf cluster */ 244#define EXT_SFBUF 2 /* sendfile(2)'s sf_bufs */ 245#define EXT_JUMBOP 3 /* jumbo cluster 4096 bytes */ 246#define EXT_JUMBO9 4 /* jumbo cluster 9216 bytes */ 247#define EXT_JUMBO16 5 /* jumbo cluster 16184 bytes */ 248#define EXT_PACKET 6 /* mbuf+cluster from packet zone */ 249#define EXT_MBUF 7 /* external mbuf reference (M_IOVEC) */ 250#define EXT_NET_DRV 100 /* custom ext_buf provided by net driver(s) */ 251#define EXT_MOD_TYPE 200 /* custom module's ext_buf type */ 252#define EXT_DISPOSABLE 300 /* can throw this buffer away w/page flipping */ 253#define EXT_EXTREF 400 /* has externally maintained ref_cnt ptr */ 254.Ed 255.Pp 256If the 257.Dv M_PKTHDR 258flag is set, a 259.Vt struct pkthdr Va m_pkthdr 260is added to the 261.Vt mbuf 262header. 263It contains a pointer to the interface 264the packet has been received from 265.Pq Vt struct ifnet Va *rcvif , 266and the total packet length 267.Pq Vt int Va len . 268Optionally, it may also contain an attached list of packet tags 269.Pq Vt "struct m_tag" . 270See 271.Xr mbuf_tags 9 272for details. 273Fields used in offloading checksum calculation to the hardware are kept in 274.Va m_pkthdr 275as well. 276See 277.Sx HARDWARE-ASSISTED CHECKSUM CALCULATION 278for details. 279.Pp 280If small enough, data is stored in the internal data buffer of an 281.Vt mbuf . 282If the data is sufficiently large, another 283.Vt mbuf 284may be added to the 285.Vt mbuf chain , 286or external storage may be associated with the 287.Vt mbuf . 288.Dv MHLEN 289bytes of data can fit into an 290.Vt mbuf 291with the 292.Dv M_PKTHDR 293flag set, 294.Dv MLEN 295bytes can otherwise. 296.Pp 297If external storage is being associated with an 298.Vt mbuf , 299the 300.Va m_ext 301header is added at the cost of losing the internal data buffer. 302It includes a pointer to external storage, the size of the storage, 303a pointer to a function used for freeing the storage, 304a pointer to an optional argument that can be passed to the function, 305and a pointer to a reference counter. 306An 307.Vt mbuf 308using external storage has the 309.Dv M_EXT 310flag set. 311.Pp 312The system supplies a macro for allocating the desired external storage 313buffer, 314.Dv MEXTADD . 315.Pp 316The allocation and management of the reference counter is handled by the 317subsystem. 318.Pp 319The system also supplies a default type of external storage buffer called an 320.Vt mbuf cluster . 321.Vt Mbuf clusters 322can be allocated and configured with the use of the 323.Dv MCLGET 324macro. 325Each 326.Vt mbuf cluster 327is 328.Dv MCLBYTES 329in size, where MCLBYTES is a machine-dependent constant. 330The system defines an advisory macro 331.Dv MINCLSIZE , 332which is the smallest amount of data to put into an 333.Vt mbuf cluster . 334It is equal to the sum of 335.Dv MLEN 336and 337.Dv MHLEN . 338It is typically preferable to store data into the data region of an 339.Vt mbuf , 340if size permits, as opposed to allocating a separate 341.Vt mbuf cluster 342to hold the same data. 343.\" 344.Ss Macros and Functions 345There are numerous predefined macros and functions that provide the 346developer with common utilities. 347.\" 348.Bl -ohang -offset indent 349.It Fn mtod mbuf type 350Convert an 351.Fa mbuf 352pointer to a data pointer. 353The macro expands to the data pointer cast to the pointer of the specified 354.Fa type . 355.Sy Note : 356It is advisable to ensure that there is enough contiguous data in 357.Fa mbuf . 358See 359.Fn m_pullup 360for details. 361.It Fn MGET mbuf how type 362Allocate an 363.Vt mbuf 364and initialize it to contain internal data. 365.Fa mbuf 366will point to the allocated 367.Vt mbuf 368on success, or be set to 369.Dv NULL 370on failure. 371The 372.Fa how 373argument is to be set to 374.Dv M_WAIT 375or 376.Dv M_DONTWAIT . 377It specifies whether the caller is willing to block if necessary. 378A number of other functions and macros related to 379.Vt mbufs 380have the same argument because they may 381at some point need to allocate new 382.Vt mbufs . 383.Pp 384Programmers should be careful not to confuse the 385.Vt mbuf 386allocation flag 387.Dv M_DONTWAIT 388with the 389.Xr malloc 9 390allocation flag, 391.Dv M_NOWAIT . 392They are not the same. 393.It Fn MGETHDR mbuf how type 394Allocate an 395.Vt mbuf 396and initialize it to contain a packet header 397and internal data. 398See 399.Fn MGET 400for details. 401.It Fn MEXTADD mbuf buf size free opt_arg1 opt_arg2 flags type 402Associate externally managed data with 403.Fa mbuf . 404Any internal data contained in the mbuf will be discarded, and the 405.Dv M_EXT flag will be set. 406The 407.Fa buf 408and 409.Fa size 410arguments are the address and length, respectively, of the data. 411The 412.Fa free 413argument points to a function which will be called to free the data 414when the mbuf is freed; it is only used if 415.Fa type 416is 417.Dv EXT_EXTREF . 418The 419.Fa opt_arg1 420and 421.Fa opt_arg2 422arguments will be passed unmodified to 423.Fa free . 424The 425.Fa flags 426argument specifies additional 427.Vt mbuf 428flags; it is not necessary to specify 429.Dv M_EXT . 430Finally, the 431.Fa type 432argument specifies the type of external data, which controls how it 433will be disposed of when the 434.Vt mbuf 435is freed. 436In most cases, the correct value is 437.Dv EXT_EXTREF . 438.It Fn MCLGET mbuf how 439Allocate and attach an 440.Vt mbuf cluster 441to 442.Fa mbuf . 443If the macro fails, the 444.Dv M_EXT 445flag will not be set in 446.Fa mbuf . 447.It Fn M_ALIGN mbuf len 448Set the pointer 449.Fa mbuf->m_data 450to place an object of the size 451.Fa len 452at the end of the internal data area of 453.Fa mbuf , 454long word aligned. 455Applicable only if 456.Fa mbuf 457is newly allocated with 458.Fn MGET 459or 460.Fn m_get . 461.It Fn MH_ALIGN mbuf len 462Serves the same purpose as 463.Fn M_ALIGN 464does, but only for 465.Fa mbuf 466newly allocated with 467.Fn MGETHDR 468or 469.Fn m_gethdr , 470or initialized by 471.Fn m_dup_pkthdr 472or 473.Fn m_move_pkthdr . 474.It Fn m_align mbuf len 475Services the same purpose as 476.Fn M_ALIGN 477but handles any type of mbuf. 478.It Fn M_LEADINGSPACE mbuf 479Returns the number of bytes available before the beginning 480of data in 481.Fa mbuf . 482.It Fn M_TRAILINGSPACE mbuf 483Returns the number of bytes available after the end of data in 484.Fa mbuf . 485.It Fn M_PREPEND mbuf len how 486This macro operates on an 487.Vt mbuf chain . 488It is an optimized wrapper for 489.Fn m_prepend 490that can make use of possible empty space before data 491(e.g.\& left after trimming of a link-layer header). 492The new 493.Vt mbuf chain 494pointer or 495.Dv NULL 496is in 497.Fa mbuf 498after the call. 499.It Fn M_MOVE_PKTHDR to from 500Using this macro is equivalent to calling 501.Fn m_move_pkthdr to from . 502.It Fn M_WRITABLE mbuf 503This macro will evaluate true if 504.Fa mbuf 505is not marked 506.Dv M_RDONLY 507and if either 508.Fa mbuf 509does not contain external storage or, 510if it does, 511then if the reference count of the storage is not greater than 1. 512The 513.Dv M_RDONLY 514flag can be set in 515.Fa mbuf->m_flags . 516This can be achieved during setup of the external storage, 517by passing the 518.Dv M_RDONLY 519bit as a 520.Fa flags 521argument to the 522.Fn MEXTADD 523macro, or can be directly set in individual 524.Vt mbufs . 525.It Fn MCHTYPE mbuf type 526Change the type of 527.Fa mbuf 528to 529.Fa type . 530This is a relatively expensive operation and should be avoided. 531.El 532.Pp 533The functions are: 534.Bl -ohang -offset indent 535.It Fn m_get how type 536A function version of 537.Fn MGET 538for non-critical paths. 539.It Fn m_getm orig len how type 540Allocate 541.Fa len 542bytes worth of 543.Vt mbufs 544and 545.Vt mbuf clusters 546if necessary and append the resulting allocated 547.Vt mbuf chain 548to the 549.Vt mbuf chain 550.Fa orig , 551if it is 552.No non- Ns Dv NULL . 553If the allocation fails at any point, 554free whatever was allocated and return 555.Dv NULL . 556If 557.Fa orig 558is 559.No non- Ns Dv NULL , 560it will not be freed. 561It is possible to use 562.Fn m_getm 563to either append 564.Fa len 565bytes to an existing 566.Vt mbuf 567or 568.Vt mbuf chain 569(for example, one which may be sitting in a pre-allocated ring) 570or to simply perform an all-or-nothing 571.Vt mbuf 572and 573.Vt mbuf cluster 574allocation. 575.It Fn m_gethdr how type 576A function version of 577.Fn MGETHDR 578for non-critical paths. 579.It Fn m_getcl how type flags 580Fetch an 581.Vt mbuf 582with a 583.Vt mbuf cluster 584attached to it. 585If one of the allocations fails, the entire allocation fails. 586This routine is the preferred way of fetching both the 587.Vt mbuf 588and 589.Vt mbuf cluster 590together, as it avoids having to unlock/relock between allocations. 591Returns 592.Dv NULL 593on failure. 594.It Fn m_getclr how type 595Allocate an 596.Vt mbuf 597and zero out the data region. 598.It Fn m_free mbuf 599Frees 600.Vt mbuf . 601Returns 602.Va m_next 603of the freed 604.Vt mbuf . 605.El 606.Pp 607The functions below operate on 608.Vt mbuf chains . 609.Bl -ohang -offset indent 610.It Fn m_freem mbuf 611Free an entire 612.Vt mbuf chain , 613including any external storage. 614.\" 615.It Fn m_adj mbuf len 616Trim 617.Fa len 618bytes from the head of an 619.Vt mbuf chain 620if 621.Fa len 622is positive, from the tail otherwise. 623.\" 624.It Fn m_append mbuf len cp 625Append 626.Vt len 627bytes of data 628.Vt cp 629to the 630.Vt mbuf chain . 631Extend the mbuf chain if the new data does not fit in 632existing space. 633.\" 634.It Fn m_prepend mbuf len how 635Allocate a new 636.Vt mbuf 637and prepend it to the 638.Vt mbuf chain , 639handle 640.Dv M_PKTHDR 641properly. 642.Sy Note : 643It does not allocate any 644.Vt mbuf clusters , 645so 646.Fa len 647must be less than 648.Dv MLEN 649or 650.Dv MHLEN , 651depending on the 652.Dv M_PKTHDR 653flag setting. 654.\" 655.It Fn m_copyup mbuf len dstoff 656Similar to 657.Fn m_pullup 658but copies 659.Fa len 660bytes of data into a new mbuf at 661.Fa dstoff 662bytes into the mbuf. 663The 664.Fa dstoff 665argument aligns the data and leaves room for a link layer header. 666Returns the new 667.Vt mbuf chain 668on success, 669and frees the 670.Vt mbuf chain 671and returns 672.Dv NULL 673on failure. 674.Sy Note : 675The function does not allocate 676.Vt mbuf clusters , 677so 678.Fa len + dstoff 679must be less than 680.Dv MHLEN . 681.\" 682.It Fn m_pullup mbuf len 683Arrange that the first 684.Fa len 685bytes of an 686.Vt mbuf chain 687are contiguous and lay in the data area of 688.Fa mbuf , 689so they are accessible with 690.Fn mtod mbuf type . 691It is important to remember that this may involve 692reallocating some mbufs and moving data so all pointers 693referencing data within the old mbuf chain 694must be recalculated or made invalid. 695Return the new 696.Vt mbuf chain 697on success, 698.Dv NULL 699on failure 700(the 701.Vt mbuf chain 702is freed in this case). 703.Sy Note : 704It does not allocate any 705.Vt mbuf clusters , 706so 707.Fa len 708must be less than 709.Dv MHLEN . 710.\" 711.It Fn m_pulldown mbuf offset len offsetp 712Arrange that 713.Fa len 714bytes between 715.Fa offset 716and 717.Fa offset + len 718in the 719.Vt mbuf chain 720are contiguous and lay in the data area of 721.Fa mbuf , 722so they are accessible with 723.Fn mtod mbuf type . 724.Fa len must be smaller than, or equal to, the size of an 725.Vt mbuf cluster . 726Return a pointer to an intermediate 727.Vt mbuf 728in the chain containing the requested region; 729the offset in the data region of the 730.Vt mbuf chain 731to the data contained in the returned mbuf is stored in 732.Fa *offsetp . 733If 734.Fa offp 735is NULL, the region may be accessed using 736.Fn mtod mbuf type . 737If 738.Fa offp 739is non-NULL, the region may be accessed using 740.Fn mtod mbuf uint8_t + *offsetp . 741The region of the mbuf chain between its beginning and 742.Fa off 743is not modified, therefore it is safe to hold pointers to data within 744this region before calling 745.Fn m_pulldown . 746.\" 747.It Fn m_copym mbuf offset len how 748Make a copy of an 749.Vt mbuf chain 750starting 751.Fa offset 752bytes from the beginning, continuing for 753.Fa len 754bytes. 755If 756.Fa len 757is 758.Dv M_COPYALL , 759copy to the end of the 760.Vt mbuf chain . 761.Sy Note : 762The copy is read-only, because the 763.Vt mbuf clusters 764are not copied, only their reference counts are incremented. 765.\" 766.It Fn m_copypacket mbuf how 767Copy an entire packet including header, which must be present. 768This is an optimized version of the common case 769.Fn m_copym mbuf 0 M_COPYALL how . 770.Sy Note : 771the copy is read-only, because the 772.Vt mbuf clusters 773are not copied, only their reference counts are incremented. 774.\" 775.It Fn m_dup mbuf how 776Copy a packet header 777.Vt mbuf chain 778into a completely new 779.Vt mbuf chain , 780including copying any 781.Vt mbuf clusters . 782Use this instead of 783.Fn m_copypacket 784when you need a writable copy of an 785.Vt mbuf chain . 786.\" 787.It Fn m_copydata mbuf offset len buf 788Copy data from an 789.Vt mbuf chain 790starting 791.Fa off 792bytes from the beginning, continuing for 793.Fa len 794bytes, into the indicated buffer 795.Fa buf . 796.\" 797.It Fn m_copyback mbuf offset len buf 798Copy 799.Fa len 800bytes from the buffer 801.Fa buf 802back into the indicated 803.Vt mbuf chain , 804starting at 805.Fa offset 806bytes from the beginning of the 807.Vt mbuf chain , 808extending the 809.Vt mbuf chain 810if necessary. 811.Sy Note : 812It does not allocate any 813.Vt mbuf clusters , 814just adds 815.Vt mbufs 816to the 817.Vt mbuf chain . 818It is safe to set 819.Fa offset 820beyond the current 821.Vt mbuf chain 822end: zeroed 823.Vt mbufs 824will be allocated to fill the space. 825.\" 826.It Fn m_length mbuf last 827Return the length of the 828.Vt mbuf chain , 829and optionally a pointer to the last 830.Vt mbuf . 831.\" 832.It Fn m_dup_pkthdr to from how 833Upon the function's completion, the 834.Vt mbuf 835.Fa to 836will contain an identical copy of 837.Fa from->m_pkthdr 838and the per-packet attributes found in the 839.Vt mbuf chain 840.Fa from . 841The 842.Vt mbuf 843.Fa from 844must have the flag 845.Dv M_PKTHDR 846initially set, and 847.Fa to 848must be empty on entry. 849.\" 850.It Fn m_move_pkthdr to from 851Move 852.Va m_pkthdr 853and the per-packet attributes from the 854.Vt mbuf chain 855.Fa from 856to the 857.Vt mbuf 858.Fa to . 859The 860.Vt mbuf 861.Fa from 862must have the flag 863.Dv M_PKTHDR 864initially set, and 865.Fa to 866must be empty on entry. 867Upon the function's completion, 868.Fa from 869will have the flag 870.Dv M_PKTHDR 871and the per-packet attributes cleared. 872.\" 873.It Fn m_fixhdr mbuf 874Set the packet-header length to the length of the 875.Vt mbuf chain . 876.\" 877.It Fn m_devget buf len offset ifp copy 878Copy data from a device local memory pointed to by 879.Fa buf 880to an 881.Vt mbuf chain . 882The copy is done using a specified copy routine 883.Fa copy , 884or 885.Fn bcopy 886if 887.Fa copy 888is 889.Dv NULL . 890.\" 891.It Fn m_cat m n 892Concatenate 893.Fa n 894to 895.Fa m . 896Both 897.Vt mbuf chains 898must be of the same type. 899.Fa N 900is still valid after the function returned. 901.Sy Note : 902It does not handle 903.Dv M_PKTHDR 904and friends. 905.\" 906.It Fn m_split mbuf len how 907Partition an 908.Vt mbuf chain 909in two pieces, returning the tail: 910all but the first 911.Fa len 912bytes. 913In case of failure, it returns 914.Dv NULL 915and attempts to restore the 916.Vt mbuf chain 917to its original state. 918.\" 919.It Fn m_apply mbuf off len f arg 920Apply a function to an 921.Vt mbuf chain , 922at offset 923.Fa off , 924for length 925.Fa len 926bytes. 927Typically used to avoid calls to 928.Fn m_pullup 929which would otherwise be unnecessary or undesirable. 930.Fa arg 931is a convenience argument which is passed to the callback function 932.Fa f . 933.Pp 934Each time 935.Fn f 936is called, it will be passed 937.Fa arg , 938a pointer to the 939.Fa data 940in the current mbuf, and the length 941.Fa len 942of the data in this mbuf to which the function should be applied. 943.Pp 944The function should return zero to indicate success; 945otherwise, if an error is indicated, then 946.Fn m_apply 947will return the error and stop iterating through the 948.Vt mbuf chain . 949.\" 950.It Fn m_getptr mbuf loc off 951Return a pointer to the mbuf containing the data located at 952.Fa loc 953bytes from the beginning of the 954.Vt mbuf chain . 955The corresponding offset into the mbuf will be stored in 956.Fa *off . 957.It Fn m_defrag m0 how 958Defragment an mbuf chain, returning the shortest possible 959chain of mbufs and clusters. 960If allocation fails and this can not be completed, 961.Dv NULL 962will be returned and the original chain will be unchanged. 963Upon success, the original chain will be freed and the new 964chain will be returned. 965.Fa how 966should be either 967.Dv M_WAIT 968or 969.Dv M_DONTWAIT , 970depending on the caller's preference. 971.Pp 972This function is especially useful in network drivers, where 973certain long mbuf chains must be shortened before being added 974to TX descriptor lists. 975.It Fn m_unshare m0 how 976Create a version of the specified mbuf chain whose 977contents can be safely modified without affecting other users. 978If allocation fails and this operation can not be completed, 979.Dv NULL 980will be returned. 981The original mbuf chain is always reclaimed and the reference 982count of any shared mbuf clusters is decremented. 983.Fa how 984should be either 985.Dv M_WAIT 986or 987.Dv M_DONTWAIT , 988depending on the caller's preference. 989As a side-effect of this process the returned 990mbuf chain may be compacted. 991.Pp 992This function is especially useful in the transmit path of 993network code, when data must be encrypted or otherwise 994altered prior to transmission. 995.El 996.Sh HARDWARE-ASSISTED CHECKSUM CALCULATION 997This section currently applies to TCP/IP only. 998In order to save the host CPU resources, computing checksums is 999offloaded to the network interface hardware if possible. 1000The 1001.Va m_pkthdr 1002member of the leading 1003.Vt mbuf 1004of a packet contains two fields used for that purpose, 1005.Vt int Va csum_flags 1006and 1007.Vt int Va csum_data . 1008The meaning of those fields depends on the direction a packet flows in, 1009and on whether the packet is fragmented. 1010Henceforth, 1011.Va csum_flags 1012or 1013.Va csum_data 1014of a packet 1015will denote the corresponding field of the 1016.Va m_pkthdr 1017member of the leading 1018.Vt mbuf 1019in the 1020.Vt mbuf chain 1021containing the packet. 1022.Pp 1023On output, checksum offloading is attempted after the outgoing 1024interface has been determined for a packet. 1025The interface-specific field 1026.Va ifnet.if_data.ifi_hwassist 1027(see 1028.Xr ifnet 9 ) 1029is consulted for the capabilities of the interface to assist in 1030computing checksums. 1031The 1032.Va csum_flags 1033field of the packet header is set to indicate which actions the interface 1034is supposed to perform on it. 1035The actions unsupported by the network interface are done in the 1036software prior to passing the packet down to the interface driver; 1037such actions will never be requested through 1038.Va csum_flags . 1039.Pp 1040The flags demanding a particular action from an interface are as follows: 1041.Bl -tag -width ".Dv CSUM_TCP" -offset indent 1042.It Dv CSUM_IP 1043The IP header checksum is to be computed and stored in the 1044corresponding field of the packet. 1045The hardware is expected to know the format of an IP header 1046to determine the offset of the IP checksum field. 1047.It Dv CSUM_TCP 1048The TCP checksum is to be computed. 1049(See below.) 1050.It Dv CSUM_UDP 1051The UDP checksum is to be computed. 1052(See below.) 1053.El 1054.Pp 1055Should a TCP or UDP checksum be offloaded to the hardware, 1056the field 1057.Va csum_data 1058will contain the byte offset of the checksum field relative to the 1059end of the IP header. 1060In this case, the checksum field will be initially 1061set by the TCP/IP module to the checksum of the pseudo header 1062defined by the TCP and UDP specifications. 1063.Pp 1064For outbound packets which have been fragmented 1065by the host CPU, the following will also be true, 1066regardless of the checksum flag settings: 1067.Bl -bullet -offset indent 1068.It 1069all fragments will have the flag 1070.Dv M_FRAG 1071set in their 1072.Va m_flags 1073field; 1074.It 1075the first and the last fragments in the chain will have 1076.Dv M_FIRSTFRAG 1077or 1078.Dv M_LASTFRAG 1079set in their 1080.Va m_flags , 1081correspondingly; 1082.It 1083the first fragment in the chain will have the total number 1084of fragments contained in its 1085.Va csum_data 1086field. 1087.El 1088.Pp 1089The last rule for fragmented packets takes precedence over the one 1090for a TCP or UDP checksum. 1091Nevertheless, offloading a TCP or UDP checksum is possible for a 1092fragmented packet if the flag 1093.Dv CSUM_IP_FRAGS 1094is set in the field 1095.Va ifnet.if_data.ifi_hwassist 1096associated with the network interface. 1097However, in this case the interface is expected to figure out 1098the location of the checksum field within the sequence of fragments 1099by itself because 1100.Va csum_data 1101contains a fragment count instead of a checksum offset value. 1102.Pp 1103On input, an interface indicates the actions it has performed 1104on a packet by setting one or more of the following flags in 1105.Va csum_flags 1106associated with the packet: 1107.Bl -tag -width ".Dv CSUM_IP_CHECKED" -offset indent 1108.It Dv CSUM_IP_CHECKED 1109The IP header checksum has been computed. 1110.It Dv CSUM_IP_VALID 1111The IP header has a valid checksum. 1112This flag can appear only in combination with 1113.Dv CSUM_IP_CHECKED . 1114.It Dv CSUM_DATA_VALID 1115The checksum of the data portion of the IP packet has been computed 1116and stored in the field 1117.Va csum_data 1118in network byte order. 1119.It Dv CSUM_PSEUDO_HDR 1120Can be set only along with 1121.Dv CSUM_DATA_VALID 1122to indicate that the IP data checksum found in 1123.Va csum_data 1124allows for the pseudo header defined by the TCP and UDP specifications. 1125Otherwise the checksum of the pseudo header must be calculated by 1126the host CPU and added to 1127.Va csum_data 1128to obtain the final checksum to be used for TCP or UDP validation purposes. 1129.El 1130.Pp 1131If a particular network interface just indicates success or 1132failure of TCP or UDP checksum validation without returning 1133the exact value of the checksum to the host CPU, its driver can mark 1134.Dv CSUM_DATA_VALID 1135and 1136.Dv CSUM_PSEUDO_HDR 1137in 1138.Va csum_flags , 1139and set 1140.Va csum_data 1141to 1142.Li 0xFFFF 1143hexadecimal to indicate a valid checksum. 1144It is a peculiarity of the algorithm used that the Internet checksum 1145calculated over any valid packet will be 1146.Li 0xFFFF 1147as long as the original checksum field is included. 1148.Pp 1149For inbound packets which are IP fragments, all 1150.Va csum_data 1151fields will be summed during reassembly to obtain the final checksum 1152value passed to an upper layer in the 1153.Va csum_data 1154field of the reassembled packet. 1155The 1156.Va csum_flags 1157fields of all fragments will be consolidated using logical AND 1158to obtain the final value for 1159.Va csum_flags . 1160Thus, in order to successfully 1161offload checksum computation for fragmented data, 1162all fragments should have the same value of 1163.Va csum_flags . 1164.Sh STRESS TESTING 1165When running a kernel compiled with the option 1166.Dv MBUF_STRESS_TEST , 1167the following 1168.Xr sysctl 8 Ns 1169-controlled options may be used to create 1170various failure/extreme cases for testing of network drivers 1171and other parts of the kernel that rely on 1172.Vt mbufs . 1173.Bl -tag -width ident 1174.It Va net.inet.ip.mbuf_frag_size 1175Causes 1176.Fn ip_output 1177to fragment outgoing 1178.Vt mbuf chains 1179into fragments of the specified size. 1180Setting this variable to 1 is an excellent way to 1181test the long 1182.Vt mbuf chain 1183handling ability of network drivers. 1184.It Va kern.ipc.m_defragrandomfailures 1185Causes the function 1186.Fn m_defrag 1187to randomly fail, returning 1188.Dv NULL . 1189Any piece of code which uses 1190.Fn m_defrag 1191should be tested with this feature. 1192.El 1193.Sh RETURN VALUES 1194See above. 1195.Sh SEE ALSO 1196.Xr ifnet 9 , 1197.Xr mbuf_tags 9 1198.Sh HISTORY 1199.\" Please correct me if I'm wrong 1200.Vt Mbufs 1201appeared in an early version of 1202.Bx . 1203Besides being used for network packets, they were used 1204to store various dynamic structures, such as routing table 1205entries, interface addresses, protocol control blocks, etc. 1206In more recent 1207.Fx 1208use of 1209.Vt mbufs 1210is almost entirely limited to packet storage, with 1211.Xr uma 9 1212zones being used directly to store other network-related memory. 1213.Pp 1214Historically, the 1215.Vt mbuf 1216allocator has been a special-purpose memory allocator able to run in 1217interrupt contexts and allocating from a special kernel address space map. 1218As of 1219.Fx 5.3 , 1220the 1221.Vt mbuf 1222allocator is a wrapper around 1223.Xr uma 9 , 1224allowing caching of 1225.Vt mbufs , 1226clusters, and 1227.Vt mbuf 1228+ cluster pairs in per-CPU caches, as well as bringing other benefits of 1229slab allocation. 1230.Sh AUTHORS 1231The original 1232.Nm 1233manual page was written by Yar Tikhiy. 1234The 1235.Xr uma 9 1236.Vt mbuf 1237allocator was written by Bosko Milekic. 1238