1.\" Copyright (c) 2000 FreeBSD Inc. 2.\" All rights reserved. 3.\" 4.\" Redistribution and use in source and binary forms, with or without 5.\" modification, are permitted provided that the following conditions 6.\" are met: 7.\" 1. Redistributions of source code must retain the above copyright 8.\" notice, this list of conditions and the following disclaimer. 9.\" 2. Redistributions in binary form must reproduce the above copyright 10.\" notice, this list of conditions and the following disclaimer in the 11.\" documentation and/or other materials provided with the distribution. 12.\" 13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16.\" ARE DISCLAIMED. IN NO EVENT SHALL [your name] OR CONTRIBUTORS BE LIABLE 17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23.\" SUCH DAMAGE. 24.\" 25.\" $FreeBSD$ 26.\" 27.Dd August 8, 2021 28.Dt MBUF 9 29.Os 30.\" 31.Sh NAME 32.Nm mbuf 33.Nd "memory management in the kernel IPC subsystem" 34.\" 35.Sh SYNOPSIS 36.In sys/param.h 37.In sys/systm.h 38.In sys/mbuf.h 39.\" 40.Ss Mbuf allocation macros 41.Fn MGET "struct mbuf *mbuf" "int how" "short type" 42.Fn MGETHDR "struct mbuf *mbuf" "int how" "short type" 43.Ft int 44.Fn MCLGET "struct mbuf *mbuf" "int how" 45.Fo MEXTADD 46.Fa "struct mbuf *mbuf" 47.Fa "char *buf" 48.Fa "u_int size" 49.Fa "void (*free)(struct mbuf *)" 50.Fa "void *opt_arg1" 51.Fa "void *opt_arg2" 52.Fa "int flags" 53.Fa "int type" 54.Fc 55.\" 56.Ss Mbuf utility macros 57.Fn mtod "struct mbuf *mbuf" "type" 58.Fn M_ALIGN "struct mbuf *mbuf" "u_int len" 59.Fn MH_ALIGN "struct mbuf *mbuf" "u_int len" 60.Ft int 61.Fn M_LEADINGSPACE "struct mbuf *mbuf" 62.Ft int 63.Fn M_TRAILINGSPACE "struct mbuf *mbuf" 64.Fn M_MOVE_PKTHDR "struct mbuf *to" "struct mbuf *from" 65.Fn M_PREPEND "struct mbuf *mbuf" "int len" "int how" 66.Fn MCHTYPE "struct mbuf *mbuf" "short type" 67.Ft int 68.Fn M_WRITABLE "struct mbuf *mbuf" 69.\" 70.Ss Mbuf allocation functions 71.Ft struct mbuf * 72.Fn m_get "int how" "short type" 73.Ft struct mbuf * 74.Fn m_get2 "int size" "int how" "short type" "int flags" 75.Ft struct mbuf * 76.Fn m_get3 "int size" "int how" "short type" "int flags" 77.Ft struct mbuf * 78.Fn m_getm "struct mbuf *orig" "int len" "int how" "short type" 79.Ft struct mbuf * 80.Fn m_getjcl "int how" "short type" "int flags" "int size" 81.Ft struct mbuf * 82.Fn m_getcl "int how" "short type" "int flags" 83.Ft struct mbuf * 84.Fn m_gethdr "int how" "short type" 85.Ft struct mbuf * 86.Fn m_free "struct mbuf *mbuf" 87.Ft void 88.Fn m_freem "struct mbuf *mbuf" 89.\" 90.Ss Mbuf utility functions 91.Ft void 92.Fn m_adj "struct mbuf *mbuf" "int len" 93.Ft void 94.Fn m_align "struct mbuf *mbuf" "int len" 95.Ft int 96.Fn m_append "struct mbuf *mbuf" "int len" "c_caddr_t cp" 97.Ft struct mbuf * 98.Fn m_prepend "struct mbuf *mbuf" "int len" "int how" 99.Ft struct mbuf * 100.Fn m_copyup "struct mbuf *mbuf" "int len" "int dstoff" 101.Ft struct mbuf * 102.Fn m_pullup "struct mbuf *mbuf" "int len" 103.Ft struct mbuf * 104.Fn m_pulldown "struct mbuf *mbuf" "int offset" "int len" "int *offsetp" 105.Ft struct mbuf * 106.Fn m_copym "struct mbuf *mbuf" "int offset" "int len" "int how" 107.Ft struct mbuf * 108.Fn m_copypacket "struct mbuf *mbuf" "int how" 109.Ft struct mbuf * 110.Fn m_dup "const struct mbuf *mbuf" "int how" 111.Ft void 112.Fn m_copydata "const struct mbuf *mbuf" "int offset" "int len" "caddr_t buf" 113.Ft void 114.Fn m_copyback "struct mbuf *mbuf" "int offset" "int len" "caddr_t buf" 115.Ft struct mbuf * 116.Fo m_devget 117.Fa "char *buf" 118.Fa "int len" 119.Fa "int offset" 120.Fa "struct ifnet *ifp" 121.Fa "void (*copy)(char *from, caddr_t to, u_int len)" 122.Fc 123.Ft void 124.Fn m_cat "struct mbuf *m" "struct mbuf *n" 125.Ft void 126.Fn m_catpkt "struct mbuf *m" "struct mbuf *n" 127.Ft u_int 128.Fn m_fixhdr "struct mbuf *mbuf" 129.Ft int 130.Fn m_dup_pkthdr "struct mbuf *to" "const struct mbuf *from" "int how" 131.Ft void 132.Fn m_move_pkthdr "struct mbuf *to" "struct mbuf *from" 133.Ft u_int 134.Fn m_length "struct mbuf *mbuf" "struct mbuf **last" 135.Ft struct mbuf * 136.Fn m_split "struct mbuf *mbuf" "int len" "int how" 137.Ft int 138.Fn m_apply "struct mbuf *mbuf" "int off" "int len" "int (*f)(void *arg, void *data, u_int len)" "void *arg" 139.Ft struct mbuf * 140.Fn m_getptr "struct mbuf *mbuf" "int loc" "int *off" 141.Ft struct mbuf * 142.Fn m_defrag "struct mbuf *m0" "int how" 143.Ft struct mbuf * 144.Fn m_collapse "struct mbuf *m0" "int how" "int maxfrags" 145.Ft struct mbuf * 146.Fn m_unshare "struct mbuf *m0" "int how" 147.\" 148.Sh DESCRIPTION 149An 150.Vt mbuf 151is a basic unit of memory management in the kernel IPC subsystem. 152Network packets and socket buffers are stored in 153.Vt mbufs . 154A network packet may span multiple 155.Vt mbufs 156arranged into a 157.Vt mbuf chain 158(linked list), 159which allows adding or trimming 160network headers with little overhead. 161.Pp 162While a developer should not bother with 163.Vt mbuf 164internals without serious 165reason in order to avoid incompatibilities with future changes, it 166is useful to understand the general structure of an 167.Vt mbuf . 168.Pp 169An 170.Vt mbuf 171consists of a variable-sized header and a small internal 172buffer for data. 173The total size of an 174.Vt mbuf , 175.Dv MSIZE , 176is a constant defined in 177.In sys/param.h . 178The 179.Vt mbuf 180header includes: 181.Bl -tag -width "m_nextpkt" -offset indent 182.It Va m_next 183.Pq Vt struct mbuf * 184A pointer to the next 185.Vt mbuf 186in the 187.Vt mbuf chain . 188.It Va m_nextpkt 189.Pq Vt struct mbuf * 190A pointer to the next 191.Vt mbuf chain 192in the queue. 193.It Va m_data 194.Pq Vt caddr_t 195A pointer to data attached to this 196.Vt mbuf . 197.It Va m_len 198.Pq Vt int 199The length of the data. 200.It Va m_type 201.Pq Vt short 202The type of the data. 203.It Va m_flags 204.Pq Vt int 205The 206.Vt mbuf 207flags. 208.El 209.Pp 210The 211.Vt mbuf 212flag bits are defined as follows: 213.Bd -literal 214#define M_EXT 0x00000001 /* has associated external storage */ 215#define M_PKTHDR 0x00000002 /* start of record */ 216#define M_EOR 0x00000004 /* end of record */ 217#define M_RDONLY 0x00000008 /* associated data marked read-only */ 218#define M_BCAST 0x00000010 /* send/received as link-level broadcast */ 219#define M_MCAST 0x00000020 /* send/received as link-level multicast */ 220#define M_PROMISC 0x00000040 /* packet was not for us */ 221#define M_VLANTAG 0x00000080 /* ether_vtag is valid */ 222#define M_EXTPG 0x00000100 /* has array of unmapped pages and TLS */ 223#define M_NOFREE 0x00000200 /* do not free mbuf, embedded in cluster */ 224#define M_TSTMP 0x00000400 /* rcv_tstmp field is valid */ 225#define M_TSTMP_HPREC 0x00000800 /* rcv_tstmp is high-prec, typically 226 hw-stamped on port (useful for IEEE 1588 227 and 802.1AS) */ 228 229#define M_PROTO1 0x00001000 /* protocol-specific */ 230#define M_PROTO2 0x00002000 /* protocol-specific */ 231#define M_PROTO3 0x00004000 /* protocol-specific */ 232#define M_PROTO4 0x00008000 /* protocol-specific */ 233#define M_PROTO5 0x00010000 /* protocol-specific */ 234#define M_PROTO6 0x00020000 /* protocol-specific */ 235#define M_PROTO7 0x00040000 /* protocol-specific */ 236#define M_PROTO8 0x00080000 /* protocol-specific */ 237#define M_PROTO9 0x00100000 /* protocol-specific */ 238#define M_PROTO10 0x00200000 /* protocol-specific */ 239#define M_PROTO11 0x00400000 /* protocol-specific */ 240#define M_PROTO12 0x00800000 /* protocol-specific */ 241.Ed 242.Pp 243The available 244.Vt mbuf 245types are defined as follows: 246.Bd -literal 247#define MT_DATA 1 /* dynamic (data) allocation */ 248#define MT_HEADER MT_DATA /* packet header */ 249 250#define MT_VENDOR1 4 /* for vendor-internal use */ 251#define MT_VENDOR2 5 /* for vendor-internal use */ 252#define MT_VENDOR3 6 /* for vendor-internal use */ 253#define MT_VENDOR4 7 /* for vendor-internal use */ 254 255#define MT_SONAME 8 /* socket name */ 256 257#define MT_EXP1 9 /* for experimental use */ 258#define MT_EXP2 10 /* for experimental use */ 259#define MT_EXP3 11 /* for experimental use */ 260#define MT_EXP4 12 /* for experimental use */ 261 262#define MT_CONTROL 14 /* extra-data protocol message */ 263#define MT_EXTCONTROL 15 /* control message with externalized contents */ 264#define MT_OOBDATA 16 /* expedited data */ 265.Ed 266.Pp 267The available external buffer types are defined as follows: 268.Bd -literal 269#define EXT_CLUSTER 1 /* mbuf cluster */ 270#define EXT_SFBUF 2 /* sendfile(2)'s sf_bufs */ 271#define EXT_JUMBOP 3 /* jumbo cluster 4096 bytes */ 272#define EXT_JUMBO9 4 /* jumbo cluster 9216 bytes */ 273#define EXT_JUMBO16 5 /* jumbo cluster 16184 bytes */ 274#define EXT_PACKET 6 /* mbuf+cluster from packet zone */ 275#define EXT_MBUF 7 /* external mbuf reference */ 276#define EXT_RXRING 8 /* data in NIC receive ring */ 277#define EXT_PGS 9 /* array of unmapped pages */ 278 279#define EXT_VENDOR1 224 /* for vendor-internal use */ 280#define EXT_VENDOR2 225 /* for vendor-internal use */ 281#define EXT_VENDOR3 226 /* for vendor-internal use */ 282#define EXT_VENDOR4 227 /* for vendor-internal use */ 283 284#define EXT_EXP1 244 /* for experimental use */ 285#define EXT_EXP2 245 /* for experimental use */ 286#define EXT_EXP3 246 /* for experimental use */ 287#define EXT_EXP4 247 /* for experimental use */ 288 289#define EXT_NET_DRV 252 /* custom ext_buf provided by net driver(s) */ 290#define EXT_MOD_TYPE 253 /* custom module's ext_buf type */ 291#define EXT_DISPOSABLE 254 /* can throw this buffer away w/page flipping */ 292#define EXT_EXTREF 255 /* has externally maintained ref_cnt ptr */ 293.Ed 294.Pp 295If the 296.Dv M_PKTHDR 297flag is set, a 298.Vt struct pkthdr Va m_pkthdr 299is added to the 300.Vt mbuf 301header. 302It contains a pointer to the interface 303the packet has been received from 304.Pq Vt struct ifnet Va *rcvif , 305and the total packet length 306.Pq Vt int Va len . 307Optionally, it may also contain an attached list of packet tags 308.Pq Vt "struct m_tag" . 309See 310.Xr mbuf_tags 9 311for details. 312Fields used in offloading checksum calculation to the hardware are kept in 313.Va m_pkthdr 314as well. 315See 316.Sx HARDWARE-ASSISTED CHECKSUM CALCULATION 317for details. 318.Pp 319If small enough, data is stored in the internal data buffer of an 320.Vt mbuf . 321If the data is sufficiently large, another 322.Vt mbuf 323may be added to the 324.Vt mbuf chain , 325or external storage may be associated with the 326.Vt mbuf . 327.Dv MHLEN 328bytes of data can fit into an 329.Vt mbuf 330with the 331.Dv M_PKTHDR 332flag set, 333.Dv MLEN 334bytes can otherwise. 335.Pp 336If external storage is being associated with an 337.Vt mbuf , 338the 339.Va m_ext 340header is added at the cost of losing the internal data buffer. 341It includes a pointer to external storage, the size of the storage, 342a pointer to a function used for freeing the storage, 343a pointer to an optional argument that can be passed to the function, 344and a pointer to a reference counter. 345An 346.Vt mbuf 347using external storage has the 348.Dv M_EXT 349flag set. 350.Pp 351The system supplies a macro for allocating the desired external storage 352buffer, 353.Dv MEXTADD . 354.Pp 355The allocation and management of the reference counter is handled by the 356subsystem. 357.Pp 358The system also supplies a default type of external storage buffer called an 359.Vt mbuf cluster . 360.Vt Mbuf clusters 361can be allocated and configured with the use of the 362.Dv MCLGET 363macro. 364Each 365.Vt mbuf cluster 366is 367.Dv MCLBYTES 368in size, where MCLBYTES is a machine-dependent constant. 369The system defines an advisory macro 370.Dv MINCLSIZE , 371which is the smallest amount of data to put into an 372.Vt mbuf cluster . 373It is equal to 374.Dv MHLEN 375plus one. 376It is typically preferable to store data into the data region of an 377.Vt mbuf , 378if size permits, as opposed to allocating a separate 379.Vt mbuf cluster 380to hold the same data. 381.\" 382.Ss Macros and Functions 383There are numerous predefined macros and functions that provide the 384developer with common utilities. 385.\" 386.Bl -ohang -offset indent 387.It Fn mtod mbuf type 388Convert an 389.Fa mbuf 390pointer to a data pointer. 391The macro expands to the data pointer cast to the specified 392.Fa type . 393.Sy Note : 394It is advisable to ensure that there is enough contiguous data in 395.Fa mbuf . 396See 397.Fn m_pullup 398for details. 399.It Fn MGET mbuf how type 400Allocate an 401.Vt mbuf 402and initialize it to contain internal data. 403.Fa mbuf 404will point to the allocated 405.Vt mbuf 406on success, or be set to 407.Dv NULL 408on failure. 409The 410.Fa how 411argument is to be set to 412.Dv M_WAITOK 413or 414.Dv M_NOWAIT . 415It specifies whether the caller is willing to block if necessary. 416A number of other functions and macros related to 417.Vt mbufs 418have the same argument because they may 419at some point need to allocate new 420.Vt mbufs . 421.It Fn MGETHDR mbuf how type 422Allocate an 423.Vt mbuf 424and initialize it to contain a packet header 425and internal data. 426See 427.Fn MGET 428for details. 429.It Fn MEXTADD mbuf buf size free opt_arg1 opt_arg2 flags type 430Associate externally managed data with 431.Fa mbuf . 432Any internal data contained in the mbuf will be discarded, and the 433.Dv M_EXT 434flag will be set. 435The 436.Fa buf 437and 438.Fa size 439arguments are the address and length, respectively, of the data. 440The 441.Fa free 442argument points to a function which will be called to free the data 443when the mbuf is freed; it is only used if 444.Fa type 445is 446.Dv EXT_EXTREF . 447The 448.Fa opt_arg1 449and 450.Fa opt_arg2 451arguments will be saved in 452.Va ext_arg1 453and 454.Va ext_arg2 455fields of the 456.Va struct m_ext 457of the mbuf. 458The 459.Fa flags 460argument specifies additional 461.Vt mbuf 462flags; it is not necessary to specify 463.Dv M_EXT . 464Finally, the 465.Fa type 466argument specifies the type of external data, which controls how it 467will be disposed of when the 468.Vt mbuf 469is freed. 470In most cases, the correct value is 471.Dv EXT_EXTREF . 472.It Fn MCLGET mbuf how 473Allocate and attach an 474.Vt mbuf cluster 475to 476.Fa mbuf . 477On success, a non-zero value returned; otherwise, 0. 478Historically, consumers would check for success by testing the 479.Dv M_EXT 480flag on the mbuf, but this is now discouraged to avoid unnecessary awareness 481of the implementation of external storage in protocol stacks and device 482drivers. 483.It Fn M_ALIGN mbuf len 484Set the pointer 485.Fa mbuf->m_data 486to place an object of the size 487.Fa len 488at the end of the internal data area of 489.Fa mbuf , 490long word aligned. 491Applicable only if 492.Fa mbuf 493is newly allocated with 494.Fn MGET 495or 496.Fn m_get . 497.It Fn MH_ALIGN mbuf len 498Serves the same purpose as 499.Fn M_ALIGN 500does, but only for 501.Fa mbuf 502newly allocated with 503.Fn MGETHDR 504or 505.Fn m_gethdr , 506or initialized by 507.Fn m_dup_pkthdr 508or 509.Fn m_move_pkthdr . 510.It Fn m_align mbuf len 511Services the same purpose as 512.Fn M_ALIGN 513but handles any type of mbuf. 514.It Fn M_LEADINGSPACE mbuf 515Returns the number of bytes available before the beginning 516of data in 517.Fa mbuf . 518.It Fn M_TRAILINGSPACE mbuf 519Returns the number of bytes available after the end of data in 520.Fa mbuf . 521.It Fn M_PREPEND mbuf len how 522This macro operates on an 523.Vt mbuf chain . 524It is an optimized wrapper for 525.Fn m_prepend 526that can make use of possible empty space before data 527(e.g.\& left after trimming of a link-layer header). 528The new 529.Vt mbuf chain 530pointer or 531.Dv NULL 532is in 533.Fa mbuf 534after the call. 535.It Fn M_MOVE_PKTHDR to from 536Using this macro is equivalent to calling 537.Fn m_move_pkthdr to from . 538.It Fn M_WRITABLE mbuf 539This macro will evaluate true if 540.Fa mbuf 541is not marked 542.Dv M_RDONLY 543and if either 544.Fa mbuf 545does not contain external storage or, 546if it does, 547then if the reference count of the storage is not greater than 1. 548The 549.Dv M_RDONLY 550flag can be set in 551.Fa mbuf->m_flags . 552This can be achieved during setup of the external storage, 553by passing the 554.Dv M_RDONLY 555bit as a 556.Fa flags 557argument to the 558.Fn MEXTADD 559macro, or can be directly set in individual 560.Vt mbufs . 561.It Fn MCHTYPE mbuf type 562Change the type of 563.Fa mbuf 564to 565.Fa type . 566This is a relatively expensive operation and should be avoided. 567.El 568.Pp 569The functions are: 570.Bl -ohang -offset indent 571.It Fn m_get how type 572A function version of 573.Fn MGET 574for non-critical paths. 575.It Fn m_get2 size how type flags 576Allocate an 577.Vt mbuf 578with enough space to hold specified amount of data. 579If the size is is larger than 580.Dv MJUMPAGESIZE , NULL 581will be returned. 582.It Fn m_get3 size how type flags 583Allocate an 584.Vt mbuf 585with enough space to hold specified amount of data. 586If the size is is larger than 587.Dv MJUM16BYTES, NULL 588will be returned. 589.It Fn m_getm orig len how type 590Allocate 591.Fa len 592bytes worth of 593.Vt mbufs 594and 595.Vt mbuf clusters 596if necessary and append the resulting allocated 597.Vt mbuf chain 598to the 599.Vt mbuf chain 600.Fa orig , 601if it is 602.No non- Ns Dv NULL . 603If the allocation fails at any point, 604free whatever was allocated and return 605.Dv NULL . 606If 607.Fa orig 608is 609.No non- Ns Dv NULL , 610it will not be freed. 611It is possible to use 612.Fn m_getm 613to either append 614.Fa len 615bytes to an existing 616.Vt mbuf 617or 618.Vt mbuf chain 619(for example, one which may be sitting in a pre-allocated ring) 620or to simply perform an all-or-nothing 621.Vt mbuf 622and 623.Vt mbuf cluster 624allocation. 625.It Fn m_gethdr how type 626A function version of 627.Fn MGETHDR 628for non-critical paths. 629.It Fn m_getcl how type flags 630Fetch an 631.Vt mbuf 632with a 633.Vt mbuf cluster 634attached to it. 635If one of the allocations fails, the entire allocation fails. 636This routine is the preferred way of fetching both the 637.Vt mbuf 638and 639.Vt mbuf cluster 640together, as it avoids having to unlock/relock between allocations. 641Returns 642.Dv NULL 643on failure. 644.It Fn m_getjcl how type flags size 645This is like 646.Fn m_getcl 647but the specified 648.Fa size 649of the cluster to be allocated must be one of 650.Dv MCLBYTES , MJUMPAGESIZE , MJUM9BYTES , 651or 652.Dv MJUM16BYTES . 653.It Fn m_free mbuf 654Frees 655.Vt mbuf . 656Returns 657.Va m_next 658of the freed 659.Vt mbuf . 660.El 661.Pp 662The functions below operate on 663.Vt mbuf chains . 664.Bl -ohang -offset indent 665.It Fn m_freem mbuf 666Free an entire 667.Vt mbuf chain , 668including any external storage. 669.\" 670.It Fn m_adj mbuf len 671Trim 672.Fa len 673bytes from the head of an 674.Vt mbuf chain 675if 676.Fa len 677is positive, from the tail otherwise. 678.\" 679.It Fn m_append mbuf len cp 680Append 681.Vt len 682bytes of data 683.Vt cp 684to the 685.Vt mbuf chain . 686Extend the mbuf chain if the new data does not fit in 687existing space. 688.\" 689.It Fn m_prepend mbuf len how 690Allocate a new 691.Vt mbuf 692and prepend it to the 693.Vt mbuf chain , 694handle 695.Dv M_PKTHDR 696properly. 697.Sy Note : 698It does not allocate any 699.Vt mbuf clusters , 700so 701.Fa len 702must be less than 703.Dv MLEN 704or 705.Dv MHLEN , 706depending on the 707.Dv M_PKTHDR 708flag setting. 709.\" 710.It Fn m_copyup mbuf len dstoff 711Similar to 712.Fn m_pullup 713but copies 714.Fa len 715bytes of data into a new mbuf at 716.Fa dstoff 717bytes into the mbuf. 718The 719.Fa dstoff 720argument aligns the data and leaves room for a link layer header. 721Returns the new 722.Vt mbuf chain 723on success, 724and frees the 725.Vt mbuf chain 726and returns 727.Dv NULL 728on failure. 729.Sy Note : 730The function does not allocate 731.Vt mbuf clusters , 732so 733.Fa len + dstoff 734must be less than 735.Dv MHLEN . 736.\" 737.It Fn m_pullup mbuf len 738Arrange that the first 739.Fa len 740bytes of an 741.Vt mbuf chain 742are contiguous and lay in the data area of 743.Fa mbuf , 744so they are accessible with 745.Fn mtod mbuf type . 746It is important to remember that this may involve 747reallocating some mbufs and moving data so all pointers 748referencing data within the old mbuf chain 749must be recalculated or made invalid. 750Return the new 751.Vt mbuf chain 752on success, 753.Dv NULL 754on failure 755(the 756.Vt mbuf chain 757is freed in this case). 758.Sy Note : 759It does not allocate any 760.Vt mbuf clusters , 761so 762.Fa len 763must be less than or equal to 764.Dv MHLEN . 765.\" 766.It Fn m_pulldown mbuf offset len offsetp 767Arrange that 768.Fa len 769bytes between 770.Fa offset 771and 772.Fa offset + len 773in the 774.Vt mbuf chain 775are contiguous and lay in the data area of 776.Fa mbuf , 777so they are accessible with 778.Fn mtod mbuf type . 779.Fa len 780must be smaller than, or equal to, the size of an 781.Vt mbuf cluster . 782Return a pointer to an intermediate 783.Vt mbuf 784in the chain containing the requested region; 785the offset in the data region of the 786.Vt mbuf chain 787to the data contained in the returned mbuf is stored in 788.Fa *offsetp . 789If 790.Fa offsetp 791is NULL, the region may be accessed using 792.Fn mtod mbuf type . 793If 794.Fa offsetp 795is non-NULL, the region may be accessed using 796.Fn mtod mbuf uint8_t 797+ *offsetp. 798The region of the mbuf chain between its beginning and 799.Fa offset 800is not modified, therefore it is safe to hold pointers to data within 801this region before calling 802.Fn m_pulldown . 803.\" 804.It Fn m_copym mbuf offset len how 805Make a copy of an 806.Vt mbuf chain 807starting 808.Fa offset 809bytes from the beginning, continuing for 810.Fa len 811bytes. 812If 813.Fa len 814is 815.Dv M_COPYALL , 816copy to the end of the 817.Vt mbuf chain . 818.Sy Note : 819The copy is read-only, because the 820.Vt mbuf clusters 821are not copied, only their reference counts are incremented. 822.\" 823.It Fn m_copypacket mbuf how 824Copy an entire packet including header, which must be present. 825This is an optimized version of the common case 826.Fn m_copym mbuf 0 M_COPYALL how . 827.Sy Note : 828the copy is read-only, because the 829.Vt mbuf clusters 830are not copied, only their reference counts are incremented. 831.\" 832.It Fn m_dup mbuf how 833Copy a packet header 834.Vt mbuf chain 835into a completely new 836.Vt mbuf chain , 837including copying any 838.Vt mbuf clusters . 839Use this instead of 840.Fn m_copypacket 841when you need a writable copy of an 842.Vt mbuf chain . 843.\" 844.It Fn m_copydata mbuf offset len buf 845Copy data from an 846.Vt mbuf chain 847starting 848.Fa off 849bytes from the beginning, continuing for 850.Fa len 851bytes, into the indicated buffer 852.Fa buf . 853.\" 854.It Fn m_copyback mbuf offset len buf 855Copy 856.Fa len 857bytes from the buffer 858.Fa buf 859back into the indicated 860.Vt mbuf chain , 861starting at 862.Fa offset 863bytes from the beginning of the 864.Vt mbuf chain , 865extending the 866.Vt mbuf chain 867if necessary. 868.Sy Note : 869It does not allocate any 870.Vt mbuf clusters , 871just adds 872.Vt mbufs 873to the 874.Vt mbuf chain . 875It is safe to set 876.Fa offset 877beyond the current 878.Vt mbuf chain 879end: zeroed 880.Vt mbufs 881will be allocated to fill the space. 882.\" 883.It Fn m_length mbuf last 884Return the length of the 885.Vt mbuf chain , 886and optionally a pointer to the last 887.Vt mbuf . 888.\" 889.It Fn m_dup_pkthdr to from how 890Upon the function's completion, the 891.Vt mbuf 892.Fa to 893will contain an identical copy of 894.Fa from->m_pkthdr 895and the per-packet attributes found in the 896.Vt mbuf chain 897.Fa from . 898The 899.Vt mbuf 900.Fa from 901must have the flag 902.Dv M_PKTHDR 903initially set, and 904.Fa to 905must be empty on entry. 906.\" 907.It Fn m_move_pkthdr to from 908Move 909.Va m_pkthdr 910and the per-packet attributes from the 911.Vt mbuf chain 912.Fa from 913to the 914.Vt mbuf 915.Fa to . 916The 917.Vt mbuf 918.Fa from 919must have the flag 920.Dv M_PKTHDR 921initially set, and 922.Fa to 923must be empty on entry. 924Upon the function's completion, 925.Fa from 926will have the flag 927.Dv M_PKTHDR 928and the per-packet attributes cleared. 929.\" 930.It Fn m_fixhdr mbuf 931Set the packet-header length to the length of the 932.Vt mbuf chain . 933.\" 934.It Fn m_devget buf len offset ifp copy 935Copy data from a device local memory pointed to by 936.Fa buf 937to an 938.Vt mbuf chain . 939The copy is done using a specified copy routine 940.Fa copy , 941or 942.Fn bcopy 943if 944.Fa copy 945is 946.Dv NULL . 947.\" 948.It Fn m_cat m n 949Concatenate 950.Fa n 951to 952.Fa m . 953Both 954.Vt mbuf chains 955must be of the same type. 956.Fa n 957is not guaranteed to be valid after 958.Fn m_cat 959returns. 960.Fn m_cat 961does not update any packet header fields or free mbuf tags. 962.\" 963.It Fn m_catpkt m n 964A variant of 965.Fn m_cat 966that operates on packets. 967Both 968.Fa m 969and 970.Fa n 971must contain packet headers. 972.Fa n 973is not guaranteed to be valid after 974.Fn m_catpkt 975returns. 976.\" 977.It Fn m_split mbuf len how 978Partition an 979.Vt mbuf chain 980in two pieces, returning the tail: 981all but the first 982.Fa len 983bytes. 984In case of failure, it returns 985.Dv NULL 986and attempts to restore the 987.Vt mbuf chain 988to its original state. 989.\" 990.It Fn m_apply mbuf off len f arg 991Apply a function to an 992.Vt mbuf chain , 993at offset 994.Fa off , 995for length 996.Fa len 997bytes. 998Typically used to avoid calls to 999.Fn m_pullup 1000which would otherwise be unnecessary or undesirable. 1001.Fa arg 1002is a convenience argument which is passed to the callback function 1003.Fa f . 1004.Pp 1005Each time 1006.Fn f 1007is called, it will be passed 1008.Fa arg , 1009a pointer to the 1010.Fa data 1011in the current mbuf, and the length 1012.Fa len 1013of the data in this mbuf to which the function should be applied. 1014.Pp 1015The function should return zero to indicate success; 1016otherwise, if an error is indicated, then 1017.Fn m_apply 1018will return the error and stop iterating through the 1019.Vt mbuf chain . 1020.\" 1021.It Fn m_getptr mbuf loc off 1022Return a pointer to the mbuf containing the data located at 1023.Fa loc 1024bytes from the beginning of the 1025.Vt mbuf chain . 1026The corresponding offset into the mbuf will be stored in 1027.Fa *off . 1028.It Fn m_defrag m0 how 1029Defragment an mbuf chain, returning the shortest possible 1030chain of mbufs and clusters. 1031If allocation fails and this can not be completed, 1032.Dv NULL 1033will be returned and the original chain will be unchanged. 1034Upon success, the original chain will be freed and the new 1035chain will be returned. 1036.Fa how 1037should be either 1038.Dv M_WAITOK 1039or 1040.Dv M_NOWAIT , 1041depending on the caller's preference. 1042.Pp 1043This function is especially useful in network drivers, where 1044certain long mbuf chains must be shortened before being added 1045to TX descriptor lists. 1046.It Fn m_collapse m0 how maxfrags 1047Defragment an mbuf chain, returning a chain of at most 1048.Fa maxfrags 1049mbufs and clusters. 1050If allocation fails or the chain cannot be collapsed as requested, 1051.Dv NULL 1052will be returned, with the original chain possibly modified. 1053As with 1054.Fn m_defrag , 1055.Fa how 1056should be one of 1057.Dv M_WAITOK 1058or 1059.Dv M_NOWAIT . 1060.It Fn m_unshare m0 how 1061Create a version of the specified mbuf chain whose 1062contents can be safely modified without affecting other users. 1063If allocation fails and this operation can not be completed, 1064.Dv NULL 1065will be returned. 1066The original mbuf chain is always reclaimed and the reference 1067count of any shared mbuf clusters is decremented. 1068.Fa how 1069should be either 1070.Dv M_WAITOK 1071or 1072.Dv M_NOWAIT , 1073depending on the caller's preference. 1074As a side-effect of this process the returned 1075mbuf chain may be compacted. 1076.Pp 1077This function is especially useful in the transmit path of 1078network code, when data must be encrypted or otherwise 1079altered prior to transmission. 1080.El 1081.Sh HARDWARE-ASSISTED CHECKSUM CALCULATION 1082This section currently applies to TCP/IP only. 1083In order to save the host CPU resources, computing checksums is 1084offloaded to the network interface hardware if possible. 1085The 1086.Va m_pkthdr 1087member of the leading 1088.Vt mbuf 1089of a packet contains two fields used for that purpose, 1090.Vt int Va csum_flags 1091and 1092.Vt int Va csum_data . 1093The meaning of those fields depends on the direction a packet flows in, 1094and on whether the packet is fragmented. 1095Henceforth, 1096.Va csum_flags 1097or 1098.Va csum_data 1099of a packet 1100will denote the corresponding field of the 1101.Va m_pkthdr 1102member of the leading 1103.Vt mbuf 1104in the 1105.Vt mbuf chain 1106containing the packet. 1107.Pp 1108On output, checksum offloading is attempted after the outgoing 1109interface has been determined for a packet. 1110The interface-specific field 1111.Va ifnet.if_data.ifi_hwassist 1112(see 1113.Xr ifnet 9 ) 1114is consulted for the capabilities of the interface to assist in 1115computing checksums. 1116The 1117.Va csum_flags 1118field of the packet header is set to indicate which actions the interface 1119is supposed to perform on it. 1120The actions unsupported by the network interface are done in the 1121software prior to passing the packet down to the interface driver; 1122such actions will never be requested through 1123.Va csum_flags . 1124.Pp 1125The flags demanding a particular action from an interface are as follows: 1126.Bl -tag -width ".Dv CSUM_TCP" -offset indent 1127.It Dv CSUM_IP 1128The IP header checksum is to be computed and stored in the 1129corresponding field of the packet. 1130The hardware is expected to know the format of an IP header 1131to determine the offset of the IP checksum field. 1132.It Dv CSUM_TCP 1133The TCP checksum is to be computed. 1134(See below.) 1135.It Dv CSUM_UDP 1136The UDP checksum is to be computed. 1137(See below.) 1138.El 1139.Pp 1140Should a TCP or UDP checksum be offloaded to the hardware, 1141the field 1142.Va csum_data 1143will contain the byte offset of the checksum field relative to the 1144end of the IP header. 1145In this case, the checksum field will be initially 1146set by the TCP/IP module to the checksum of the pseudo header 1147defined by the TCP and UDP specifications. 1148.Pp 1149On input, an interface indicates the actions it has performed 1150on a packet by setting one or more of the following flags in 1151.Va csum_flags 1152associated with the packet: 1153.Bl -tag -width ".Dv CSUM_IP_CHECKED" -offset indent 1154.It Dv CSUM_IP_CHECKED 1155The IP header checksum has been computed. 1156.It Dv CSUM_IP_VALID 1157The IP header has a valid checksum. 1158This flag can appear only in combination with 1159.Dv CSUM_IP_CHECKED . 1160.It Dv CSUM_DATA_VALID 1161The checksum of the data portion of the IP packet has been computed 1162and stored in the field 1163.Va csum_data 1164in network byte order. 1165.It Dv CSUM_PSEUDO_HDR 1166Can be set only along with 1167.Dv CSUM_DATA_VALID 1168to indicate that the IP data checksum found in 1169.Va csum_data 1170allows for the pseudo header defined by the TCP and UDP specifications. 1171Otherwise the checksum of the pseudo header must be calculated by 1172the host CPU and added to 1173.Va csum_data 1174to obtain the final checksum to be used for TCP or UDP validation purposes. 1175.El 1176.Pp 1177If a particular network interface just indicates success or 1178failure of TCP or UDP checksum validation without returning 1179the exact value of the checksum to the host CPU, its driver can mark 1180.Dv CSUM_DATA_VALID 1181and 1182.Dv CSUM_PSEUDO_HDR 1183in 1184.Va csum_flags , 1185and set 1186.Va csum_data 1187to 1188.Li 0xFFFF 1189hexadecimal to indicate a valid checksum. 1190It is a peculiarity of the algorithm used that the Internet checksum 1191calculated over any valid packet will be 1192.Li 0xFFFF 1193as long as the original checksum field is included. 1194.Sh STRESS TESTING 1195When running a kernel compiled with the option 1196.Dv MBUF_STRESS_TEST , 1197the following 1198.Xr sysctl 8 Ns 1199-controlled options may be used to create 1200various failure/extreme cases for testing of network drivers 1201and other parts of the kernel that rely on 1202.Vt mbufs . 1203.Bl -tag -width ident 1204.It Va net.inet.ip.mbuf_frag_size 1205Causes 1206.Fn ip_output 1207to fragment outgoing 1208.Vt mbuf chains 1209into fragments of the specified size. 1210Setting this variable to 1 is an excellent way to 1211test the long 1212.Vt mbuf chain 1213handling ability of network drivers. 1214.It Va kern.ipc.m_defragrandomfailures 1215Causes the function 1216.Fn m_defrag 1217to randomly fail, returning 1218.Dv NULL . 1219Any piece of code which uses 1220.Fn m_defrag 1221should be tested with this feature. 1222.El 1223.Sh RETURN VALUES 1224See above. 1225.Sh SEE ALSO 1226.Xr ifnet 9 , 1227.Xr mbuf_tags 9 1228.Rs 1229.\" 4.4BSD SMM:18 1230.%A S. J. Leffler 1231.%A W. N. Joy 1232.%A R. S. Fabry 1233.%A M. J. Karels 1234.%T Networking Implementation Notes 1235.%B 4.4BSD System Manager's Manual (SMM) 1236.Re 1237.Sh HISTORY 1238.\" Please correct me if I'm wrong 1239.Vt Mbufs 1240appeared in an early version of 1241.Bx . 1242Besides being used for network packets, they were used 1243to store various dynamic structures, such as routing table 1244entries, interface addresses, protocol control blocks, etc. 1245In more recent 1246.Fx 1247use of 1248.Vt mbufs 1249is almost entirely limited to packet storage, with 1250.Xr uma 9 1251zones being used directly to store other network-related memory. 1252.Pp 1253Historically, the 1254.Vt mbuf 1255allocator has been a special-purpose memory allocator able to run in 1256interrupt contexts and allocating from a special kernel address space map. 1257As of 1258.Fx 5.3 , 1259the 1260.Vt mbuf 1261allocator is a wrapper around 1262.Xr uma 9 , 1263allowing caching of 1264.Vt mbufs , 1265clusters, and 1266.Vt mbuf 1267+ cluster pairs in per-CPU caches, as well as bringing other benefits of 1268slab allocation. 1269.Sh AUTHORS 1270The original 1271.Nm 1272manual page was written by 1273.An Yar Tikhiy . 1274The 1275.Xr uma 9 1276.Vt mbuf 1277allocator was written by 1278.An Bosko Milekic . 1279