1.\" Copyright (c) 2000 FreeBSD Inc. 2.\" All rights reserved. 3.\" 4.\" Redistribution and use in source and binary forms, with or without 5.\" modification, are permitted provided that the following conditions 6.\" are met: 7.\" 1. Redistributions of source code must retain the above copyright 8.\" notice, this list of conditions and the following disclaimer. 9.\" 2. Redistributions in binary form must reproduce the above copyright 10.\" notice, this list of conditions and the following disclaimer in the 11.\" documentation and/or other materials provided with the distribution. 12.\" 13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16.\" ARE DISCLAIMED. IN NO EVENT SHALL [your name] OR CONTRIBUTORS BE LIABLE 17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23.\" SUCH DAMAGE. 24.\" 25.\" $FreeBSD$ 26.\" 27.Dd October 17, 2000 28.Dt MBUF 9 29.Os 30.\" 31.Sh NAME 32.Nm mbuf 33.Nd "memory management in the kernel IPC subsystem" 34.\" 35.Sh SYNOPSIS 36.In sys/param.h 37.In sys/systm.h 38.In sys/mbuf.h 39.\" 40.Ss Mbuf allocation macros 41.Fn MGET "struct mbuf *mbuf" "int how" "short type" 42.Fn MGETHDR "struct mbuf *mbuf" "int how" "short type" 43.Fn MCLGET "struct mbuf *mbuf" "int how" 44.Fo MEXTADD 45.Fa "struct mbuf *mbuf" 46.Fa "caddr_t buf" 47.Fa "u_int size" 48.Fa "void (*free)(void *opt_args)" 49.Fa "void *opt_args" 50.Fa "short flags" 51.Fa "int type" 52.Fc 53.Fn MEXTFREE "struct mbuf *mbuf" 54.Fn MEXT_ADD_REF "struct mbuf *mbuf" 55.Fn MEXT_REM_REF "struct mbuf *mbuf" 56.Fn MFREE "struct mbuf *mbuf" "struct mbuf *successor" 57.\" 58.Ss Mbuf utility macros 59.Ft void * 60.Fn mtod "struct mbuf *mbuf" "type" 61.Ft int 62.Fn MEXT_IS_REF "struct mbuf *mbuf" 63.Fn M_ALIGN "struct mbuf *mbuf" "u_int len" 64.Fn MH_ALIGN "struct mbuf *mbuf" "u_int len" 65.Ft int 66.Fn M_LEADINGSPACE "struct mbuf *mbuf" 67.Ft int 68.Fn M_TRAILINGSPACE "struct mbuf *mbuf" 69.Fn M_MOVE_PKTHDR "struct mbuf *to" "struct mbuf *from" 70.Fn M_PREPEND "struct mbuf *mbuf" "int len" "int how" 71.Fn MCHTYPE "struct mbuf *mbuf" "u_int type" 72.Ft int 73.Fn M_WRITABLE "struct mbuf *mbuf" 74.\" 75.Ss Mbuf allocation functions 76.Ft struct mbuf * 77.Fn m_get "int how" "int type" 78.Ft struct mbuf * 79.Fn m_getm "struct mbuf *orig" "int len" "int how" "int type" 80.Ft struct mbuf * 81.Fn m_getclr "int how" "int type" 82.Ft struct mbuf * 83.Fn m_gethdr "int how" "int type" 84.Ft struct mbuf * 85.Fn m_free "struct mbuf *mbuf" 86.Ft void 87.Fn m_freem "struct mbuf *mbuf" 88.\" 89.Ss Mbuf utility functions 90.Ft void 91.Fn m_adj "struct mbuf *mbuf" "int len" 92.Ft struct mbuf * 93.Fn m_prepend "struct mbuf *mbuf" "int len" "int how" 94.Ft struct mbuf * 95.Fn m_pullup "struct mbuf *mbuf" "int len" 96.Ft struct mbuf * 97.Fn m_copym "struct mbuf *mbuf" "int offset" "int len" "int how" 98.Ft struct mbuf * 99.Fn m_copypacket "struct mbuf *mbuf" "int how" 100.Ft struct mbuf * 101.Fn m_dup "struct mbuf *mbuf" "int how" 102.Ft void 103.Fn m_copydata "const struct mbuf *mbuf" "int offset" "int len" "caddr_t buf" 104.Ft void 105.Fn m_copyback "struct mbuf *mbuf" "int offset" "int len" "caddr_t buf" 106.Ft struct mbuf * 107.Fo m_devget 108.Fa "char *buf" 109.Fa "int len" 110.Fa "int offset" 111.Fa "struct ifnet *ifp" 112.Fa "void (*copy)(char *from, caddr_t to, u_int len)" 113.Fc 114.Ft void 115.Fn m_cat "struct mbuf *m" "struct mbuf *n" 116.Ft u_int 117.Fn m_fixhdr "struct mbuf *mbuf" 118.Ft void 119.Fn m_dup_pkthdr "struct mbuf *to" "struct mbuf *from" 120.Ft void 121.Fn m_move_pkthdr "struct mbuf *to" "struct mbuf *from" 122.Ft u_int 123.Fn m_length "struct mbuf *mbuf" "struct mbuf **last" 124.Ft struct mbuf * 125.Fn m_split "struct mbuf *mbuf" "int len" "int how" 126.\" 127.Sh DESCRIPTION 128An 129.Vt mbuf 130is a basic unit of memory management in the kernel IPC subsystem. 131Network packets and socket buffers are stored in 132.Vt mbufs . 133A network packet may span multiple 134.Vt mbufs 135arranged into a 136.Vt mbuf chain 137(linked list), 138which allows adding or trimming 139network headers with little overhead. 140.Pp 141While a developer should not bother with 142.Vt mbuf 143internals without serious 144reason in order to avoid incompatibilities with future changes, it 145is useful to understand the general structure of an 146.Vt mbuf . 147.Pp 148An 149.Vt mbuf 150consists of a variable-sized header and a small internal 151buffer for data. 152The total size of an 153.Vt mbuf , 154.Dv MSIZE , 155is a machine-dependent constant defined in 156.In machine/param.h . 157The 158.Vt mbuf 159header includes: 160.Pp 161.Bl -tag -width "m_nextpkt" -offset indent 162.It Va m_next 163.Pq Vt struct mbuf * 164A pointer to the next 165.Vt mbuf 166in the 167.Vt mbuf chain . 168.It Va m_nextpkt 169.Pq Vt struct mbuf * 170A pointer to the next 171.Vt mbuf chain 172in the queue. 173.It Va m_data 174.Pq Vt caddr_t 175A pointer to data attached to this 176.Vt mbuf . 177.It Va m_len 178.Pq Vt int 179The length of the data. 180.It Va m_type 181.Pq Vt short 182The type of the data. 183.It Va m_flags 184.Pq Vt int 185The 186.Vt mbuf 187flags. 188.El 189.Pp 190The 191.Vt mbuf 192flag bits are defined as follows: 193.Bd -literal 194/* mbuf flags */ 195#define M_EXT 0x0001 /* has associated external storage */ 196#define M_PKTHDR 0x0002 /* start of record */ 197#define M_EOR 0x0004 /* end of record */ 198#define M_RDONLY 0x0008 /* associated data marked read-only */ 199#define M_PROTO1 0x0010 /* protocol-specific */ 200#define M_PROTO2 0x0020 /* protocol-specific */ 201#define M_PROTO3 0x0040 /* protocol-specific */ 202#define M_PROTO4 0x0080 /* protocol-specific */ 203#define M_PROTO5 0x0100 /* protocol-specific */ 204#define M_PROTO6 0x4000 /* protocol-specific (avoid M_BCAST conflict) */ 205#define M_FREELIST 0x8000 /* mbuf is on the free list */ 206 207/* mbuf pkthdr flags (also stored in m_flags) */ 208#define M_BCAST 0x0200 /* send/received as link-level broadcast */ 209#define M_MCAST 0x0400 /* send/received as link-level multicast */ 210#define M_FRAG 0x0800 /* packet is fragment of larger packet */ 211#define M_FIRSTFRAG 0x1000 /* packet is first fragment */ 212#define M_LASTFRAG 0x2000 /* packet is last fragment */ 213.Ed 214.Pp 215The available 216.Vt mbuf 217types are defined as follows: 218.Bd -literal 219/* mbuf types */ 220#define MT_DATA 1 /* dynamic (data) allocation */ 221#define MT_HEADER 2 /* packet header */ 222#define MT_SONAME 8 /* socket name */ 223#define MT_FTABLE 11 /* fragment reassembly header */ 224#define MT_TAG 13 /* volatile metadata associated to pkts */ 225#define MT_CONTROL 14 /* extra-data protocol message */ 226#define MT_OOBDATA 15 /* expedited data */ 227.Ed 228.Pp 229If the 230.Dv M_PKTHDR 231flag is set, a 232.Vt struct pkthdr Va m_pkthdr 233is added to the 234.Vt mbuf 235header. 236It contains a pointer to the interface 237the packet has been received from 238.Pq Vt struct ifnet Va *rcvif , 239and the total packet length 240.Pq Vt int Va len . 241.Pp 242If small enough, data is stored in the internal data buffer of an 243.Vt mbuf . 244If the data is sufficiently large, another 245.Vt mbuf 246may be added to the 247.Vt mbuf chain , 248or external storage may be associated with the 249.Vt mbuf . 250.Dv MHLEN 251bytes of data can fit into an 252.Vt mbuf 253with the 254.Dv M_PKTHDR 255flag set, 256.Dv MLEN 257bytes can otherwise. 258.Pp 259If external storage is being associated with an 260.Vt mbuf , 261the 262.Va m_ext 263header is added at the cost of losing the internal data buffer. 264It includes a pointer to external storage, the size of the storage, 265a pointer to a function used for freeing the storage, 266a pointer to an optional argument that can be passed to the function, 267and a pointer to a reference counter. 268An 269.Vt mbuf 270using external storage has the 271.Dv M_EXT 272flag set. 273.Pp 274The system supplies a macro for allocating the desired external storage 275buffer, 276.Dv MEXTADD . 277.Pp 278The allocation and management of the reference counter is handled by the 279subsystem. 280The developer can check whether the reference count for the 281external storage of a given 282.Vt mbuf 283is greater than 1 with the 284.Dv MEXT_IS_REF 285macro. 286Similarly, the developer can directly add and remove references, 287if absolutely necessary, with the use of the 288.Dv MEXT_ADD_REF 289and 290.Dv MEXT_REM_REF 291macros. 292.Pp 293The system also supplies a default type of external storage buffer called an 294.Vt mbuf cluster . 295.Vt Mbuf clusters 296can be allocated and configured with the use of the 297.Dv MCLGET 298macro. 299Each 300.Vt mbuf cluster 301is 302.Dv MCLBYTES 303in size, where MCLBYTES is a machine-dependent constant. 304The system defines an advisory macro 305.Dv MINCLSIZE , 306which is the smallest amount of data to put into an 307.Vt mbuf cluster . 308It's equal to the sum of 309.Dv MLEN 310and 311.Dv MHLEN . 312It is typically preferable to store data into the data region of an 313.Vt mbuf , 314if size permits, as opposed to allocating a separate 315.Vt mbuf cluster 316to hold the same data. 317.\" 318.Ss Macros and Functions 319There are numerous predefined macros and functions that provide the 320developer with common utilities. 321.\" 322.Bl -ohang -offset indent 323.It Fn mtod mbuf type 324Convert an 325.Fa mbuf 326pointer to a data pointer. 327The macro expands to the data pointer cast to the pointer of the specified 328.Fa type . 329.Sy Note : 330It is advisable to ensure that there is enough contiguous data in 331.Fa mbuf . 332See 333.Fn m_pullup 334for details. 335.It Fn MGET mbuf how type 336Allocate an 337.Vt mbuf 338and initialize it to contain internal data. 339.Fa mbuf 340will point to the allocated 341.Vt mbuf 342on success, or be set to 343.Dv NULL 344on failure. 345The 346.Fa how 347argument is to be set to 348.Dv M_TRYWAIT 349or 350.Dv M_DONTWAIT . 351It specifies whether the caller is willing to block if necessary. 352If 353.Fa how 354is set to 355.Dv M_TRYWAIT , 356a failed allocation will result in the caller being put 357to sleep for a designated 358kern.ipc.mbuf_wait 359.Xr ( sysctl 8 360tunable) 361number of ticks. 362A number of other functions and macros related to 363.Vt mbufs 364have the same argument because they may 365at some point need to allocate new 366.Vt mbufs . 367.Pp 368Programmers should be careful not to confuse the 369.Vt mbuf 370allocation flag 371.Dv M_DONTWAIT 372with the 373.Xr malloc 9 374allocation flag, 375.Dv M_NOWAIT . 376They are not the same. 377.It Fn MGETHDR mbuf how type 378Allocate an 379.Vt mbuf 380and initialize it to contain a packet header 381and internal data. 382See 383.Fn MGET 384for details. 385.It Fn MCLGET mbuf how 386Allocate and attach an 387.Vt mbuf cluster 388to 389.Fa mbuf . 390If the macro fails, the 391.Dv M_EXT 392flag won't be set in 393.Fa mbuf . 394.It Fn M_ALIGN mbuf len 395Set the pointer 396.Fa mbuf->m_data 397to place an object of the size 398.Fa len 399at the end of the internal data area of 400.Fa mbuf , 401long word aligned. 402Applicable only if 403.Fa mbuf 404is newly allocated with 405.Fn MGET 406or 407.Fn m_get . 408.It Fn MH_ALIGN mbuf len 409Serves the same purpose as 410.Fn M_ALIGN 411does, but only for 412.Fa mbuf 413newly allocated with 414.Fn MGETHDR 415or 416.Fn m_gethdr , 417or initialized by 418.Fn m_dup_pkthdr 419or 420.Fn m_move_pkthdr . 421.It Fn M_LEADINGSPACE mbuf 422Returns the number of bytes available before the beginning 423of data in 424.Fa mbuf . 425.It Fn M_TRAILINGSPACE mbuf 426Returns the number of bytes available after the end of data in 427.Fa mbuf . 428.It Fn M_PREPEND mbuf len how 429This macro operates on an 430.Vt mbuf chain . 431It is an optimized wrapper for 432.Fn m_prepend 433that can make use of possible empty space before data 434(e.g. left after trimming of a link-layer header). 435The new 436.Vt mbuf chain 437pointer or 438.Dv NULL 439is in 440.Fa mbuf 441after the call. 442.It Fn M_MOVE_PKTHDR to from 443Using this macro is equivalent to calling 444.Fn m_move_pkthdr to from . 445.It Fn M_WRITABLE mbuf 446This macro will evaluate true if 447.Fa mbuf 448is not marked 449.Dv M_RDONLY 450and if either 451.Fa mbuf 452does not contain external storage or, 453if it does, 454then if the reference count of the storage is not greater than 1. 455The 456.Dv M_RDONLY 457flag can be set in 458.Fa mbuf->m_flags . 459This can be achieved during setup of the external storage, 460by passing the 461.Dv M_RDONLY 462bit as a 463.Fa flags 464argument to the 465.Fn MEXTADD 466macro, or can be directly set in individual 467.Vt mbufs . 468.It Fn MCHTYPE mbuf type 469Change the type of 470.Fa mbuf 471to 472.Fa type . 473This is a relatively expensive operation and should be avoided. 474.El 475.Pp 476The functions are: 477.Bl -ohang -offset indent 478.It Fn m_get how type 479A function version of 480.Fn MGET 481for non-critical paths. 482.It Fn m_getm orig len how type 483Allocate 484.Fa len 485bytes worth of 486.Vt mbufs 487and 488.Vt mbuf clusters 489if necessary and append the resulting allocated 490.Vt mbuf chain 491to the 492.Vt mbuf chain 493.Fa orig , 494if it is 495.No non- Ns Dv NULL . 496If the allocation fails at any point, 497free whatever was allocated and return 498.Dv NULL . 499If 500.Fa orig 501is 502.No non- Ns Dv NULL , 503it will not be freed. 504It is possible to use 505.Fn m_getm 506to either append 507.Fa len 508bytes to an existing 509.Vt mbuf 510or 511.Vt mbuf chain 512(for example, one which may be sitting in a pre-allocated ring) 513or to simply perform an all-or-nothing 514.Vt mbuf 515and 516.Vt mbuf cluster 517allocation. 518.It Fn m_gethdr how type 519A function version of 520.Fn MGETHDR 521for non-critical paths. 522.It Fn m_getclr how type 523Allocate an 524.Vt mbuf 525and zero out the data region. 526.El 527.Pp 528The functions below operate on 529.Vt mbuf chains . 530.Bl -ohang -offset indent 531.It Fn m_freem mbuf 532Free an entire 533.Vt mbuf chain , 534including any external storage. 535.\" 536.It Fn m_adj mbuf len 537Trim 538.Fa len 539bytes from the head of an 540.Vt mbuf chain 541if 542.Fa len 543is positive, from the tail otherwise. 544.\" 545.It Fn m_prepend mbuf len how 546Allocate a new 547.Vt mbuf 548and prepend it to the 549.Vt mbuf chain , 550handle 551.Dv M_PKTHDR 552properly. 553.Sy Note : 554It doesn't allocate any 555.Vt mbuf clusters , 556so 557.Fa len 558must be less than 559.Dv MLEN 560or 561.Dv MHLEN , 562depending on the 563.Dv M_PKTHDR 564flag setting. 565.\" 566.It Fn m_pullup mbuf len 567Arrange that the first 568.Fa len 569bytes of an 570.Vt mbuf chain 571are contiguous and lay in the data area of 572.Fa mbuf , 573so they are accessible with 574.Fn mtod mbuf type . 575Return the new 576.Vt mbuf chain 577on success, 578.Dv NULL 579on failure 580(the 581.Vt mbuf chain 582is freed in this case). 583.Sy Note : 584It doesn't allocate any 585.Vt mbuf clusters , 586so 587.Fa len 588must be less than 589.Dv MHLEN . 590.\" 591.It Fn m_copym mbuf offset len how 592Make a copy of an 593.Vt mbuf chain 594starting 595.Fa offset 596bytes from the beginning, continuing for 597.Fa len 598bytes. 599If 600.Fa len 601is 602.Dv M_COPYALL , 603copy to the end of the 604.Vt mbuf chain . 605.Sy Note : 606The copy is read-only, because the 607.Vt mbuf clusters 608are not copied, only their reference counts are incremented. 609.\" 610.It Fn m_copypacket mbuf how 611Copy an entire packet including header, which must be present. 612This is an optimized version of the common case 613.Fn m_copym mbuf 0 M_COPYALL how . 614.Sy Note : 615the copy is read-only, because the 616.Vt mbuf clusters 617are not copied, only their reference counts are incremented. 618.\" 619.It Fn m_dup mbuf how 620Copy a packet header 621.Vt mbuf chain 622into a completely new 623.Vt mbuf chain , 624including copying any 625.Vt mbuf clusters . 626Use this instead of 627.Fn m_copypacket 628when you need a writable copy of an 629.Vt mbuf chain . 630.\" 631.It Fn m_copydata mbuf offset len buf 632Copy data from an 633.Vt mbuf chain 634starting 635.Fa off 636bytes from the beginning, continuing for 637.Fa len 638bytes, into the indicated buffer 639.Fa buf . 640.\" 641.It Fn m_copyback mbuf offset len buf 642Copy 643.Fa len 644bytes from the buffer 645.Fa buf 646back into the indicated 647.Vt mbuf chain , 648starting at 649.Fa offset 650bytes from the beginning of the 651.Vt mbuf chain , 652extending the 653.Vt mbuf chain 654if necessary. 655.Sy Note : 656It doesn't allocate any 657.Vt mbuf clusters , 658just adds 659.Vt mbufs 660to the 661.Vt mbuf chain . 662It's safe to set 663.Fa offset 664beyond the current 665.Vt mbuf chain 666end: zeroed 667.Vt mbufs 668will be allocated to fill the space. 669.\" 670.It Fn m_length mbuf last 671Return the length of the 672.Vt mbuf chain , 673and optionally a pointer to the last 674.Vt mbuf . 675.\" 676.It Fn m_dup_pkthdr to from how 677Upon the function's completion, the 678.Vt mbuf 679.Fa to 680will contain an identical copy of 681.Fa from->m_pkthdr 682and the per-packet attributes found in the 683.Vt mbuf chain 684.Fa from . 685The 686.Vt mbuf 687.Fa from 688must have the flag 689.Dv M_PKTHDR 690initially set, and 691.Fa to 692must be empty on entry. 693.\" 694.It Fn m_move_pkthdr to from 695Move 696.Va m_pkthdr 697and the per-packet attributes from the 698.Vt mbuf chain 699.Fa from 700to the 701.Vt mbuf 702.Fa to . 703The 704.Vt mbuf 705.Fa from 706must have the flag 707.Dv M_PKTHDR 708initially set, and 709.Fa to 710must be empty on entry. 711Upon the function's completion, 712.Fa from 713will have the flag 714.Dv M_PKTHDR 715and the per-packet attributes cleared. 716.\" 717.It Fn m_fixhdr mbuf 718Set the packet-header length to the length of the 719.Vt mbuf chain . 720.\" 721.It Fn m_devget buf len offset ifp copy 722Copy data from a device local memory pointed to by 723.Fa buf 724to an 725.Vt mbuf chain . 726The copy is done using a specified copy routine 727.Fa copy , 728or 729.Fn bcopy 730if 731.Fa copy 732is 733.Dv NULL . 734.\" 735.It Fn m_cat m n 736Concatenate 737.Fa n 738to 739.Fa m . 740Both 741.Vt mbuf chains 742must be of the same type. 743.Fa N 744is still valid after the function returned. 745.Sy Note : 746It does not handle 747.Dv M_PKTHDR 748and friends. 749.\" 750.It Fn m_split mbuf len how 751Partition an 752.Vt mbuf chain 753in two pieces, returning the tail: 754all but the first 755.Fa len 756bytes. 757In case of failure, it returns 758.Dv NULL 759and attempts to restore the 760.Vt mbuf chain 761to its original state. 762.El 763.Sh STRESS TESTING 764When running a kernel compiled with the option 765.Dv MBUF_STRESS_TEST , 766the following 767.Xr sysctl 8 Ns 768-controlled options may be used to create 769various failure/extreme cases for testing of network drivers 770and other parts of the kernel that rely on 771.Vt mbufs . 772.Bl -tag -width ident 773.It Va net.inet.ip.mbuf_frag_size 774Causes 775.Fn ip_output 776to fragment outgoing 777.Vt mbuf chains 778into fragments of the specified size. 779Setting this variable to 1 is an excellent way to 780test the long 781.Vt mbuf chain 782handling ability of network drivers. 783.It Va kern.ipc.m_defragrandomfailures 784Causes the function 785.Fn m_defrag 786to randomly fail, returning 787.Dv NULL . 788Any piece of code which uses 789.Fn m_defrag 790should be tested with this feature. 791.El 792.Sh RETURN VALUES 793See above. 794.Sh HISTORY 795.\" Please correct me if I'm wrong 796.Vt Mbufs 797appeared in an early version of 798.Bx . 799Besides being used for network packets, they were used 800to store various dynamic structures, such as routing table 801entries, interface addresses, protocol control blocks, etc. 802.Sh AUTHORS 803The original 804.Nm 805man page was written by Yar Tikhiy. 806