1.\" Copyright (c) 2000 FreeBSD Inc. 2.\" All rights reserved. 3.\" 4.\" Redistribution and use in source and binary forms, with or without 5.\" modification, are permitted provided that the following conditions 6.\" are met: 7.\" 1. Redistributions of source code must retain the above copyright 8.\" notice, this list of conditions and the following disclaimer. 9.\" 2. Redistributions in binary form must reproduce the above copyright 10.\" notice, this list of conditions and the following disclaimer in the 11.\" documentation and/or other materials provided with the distribution. 12.\" 13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16.\" ARE DISCLAIMED. IN NO EVENT SHALL [your name] OR CONTRIBUTORS BE LIABLE 17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23.\" SUCH DAMAGE. 24.\" 25.\" $FreeBSD$ 26.\" 27.Dd October 17, 2000 28.Dt MBUF 9 29.Os 30.\" 31.Sh NAME 32.Nm mbuf 33.Nd "memory management in the kernel IPC subsystem" 34.\" 35.Sh SYNOPSIS 36.In sys/param.h 37.In sys/systm.h 38.In sys/mbuf.h 39.\" 40.Ss Mbuf allocation macros 41.Fn MGET "struct mbuf *mbuf" "int how" "short type" 42.Fn MGETHDR "struct mbuf *mbuf" "int how" "short type" 43.Fn MCLGET "struct mbuf *mbuf" "int how" 44.Fo MEXTADD 45.Fa "struct mbuf *mbuf" 46.Fa "caddr_t buf" 47.Fa "u_int size" 48.Fa "void (*free)(void *opt_args)" 49.Fa "void *opt_args" 50.Fa "short flags" 51.Fa "int type" 52.Fc 53.Fn MEXTFREE "struct mbuf *mbuf" 54.Fn MEXT_ADD_REF "struct mbuf *mbuf" 55.Fn MEXT_REM_REF "struct mbuf *mbuf" 56.Fn MFREE "struct mbuf *mbuf" "struct mbuf *successor" 57.\" 58.Ss Mbuf utility macros 59.Ft void * 60.Fn mtod "struct mbuf *mbuf" "type" 61.Ft int 62.Fn MEXT_IS_REF "struct mbuf *mbuf" 63.Fn M_COPY_PKTHDR "struct mbuf *to" "struct mbuf *from" 64.Fn M_ALIGN "struct mbuf *mbuf" "u_int len" 65.Fn MH_ALIGN "struct mbuf *mbuf" "u_int len" 66.Ft int 67.Fn M_LEADINGSPACE "struct mbuf *mbuf" 68.Ft int 69.Fn M_TRAILINGSPACE "struct mbuf *mbuf" 70.Fn M_PREPEND "struct mbuf *mbuf" "int len" "int how" 71.Fn MCHTYPE "struct mbuf *mbuf" "u_int type" 72.Ft int 73.Fn M_WRITABLE "struct mbuf *mbuf" 74.\" 75.Ss Mbuf allocation functions 76.Ft struct mbuf * 77.Fn m_get "int how" "int type" 78.Ft struct mbuf * 79.Fn m_getm "struct mbuf *orig" "int len" "int how" "int type" 80.Ft struct mbuf * 81.Fn m_getclr "int how" "int type" 82.Ft struct mbuf * 83.Fn m_gethdr "int how" "int type" 84.Ft struct mbuf * 85.Fn m_free "struct mbuf *mbuf" 86.Ft void 87.Fn m_freem "struct mbuf *mbuf" 88.\" 89.Ss Mbuf utility functions 90.Ft void 91.Fn m_adj "struct mbuf *mbuf" "int len" 92.Ft struct mbuf * 93.Fn m_prepend "struct mbuf *mbuf" "int len" "int how" 94.Ft struct mbuf * 95.Fn m_pullup "struct mbuf *mbuf" "int len" 96.Ft struct mbuf * 97.Fn m_copym "struct mbuf *mbuf" "int offset" "int len" "int how" 98.Ft struct mbuf * 99.Fn m_copypacket "struct mbuf *mbuf" "int how" 100.Ft struct mbuf * 101.Fn m_dup "struct mbuf *mbuf" "int how" 102.Ft void 103.Fn m_copydata "const struct mbuf *mbuf" "int offset" "int len" "caddr_t buf" 104.Ft void 105.Fn m_copyback "struct mbuf *mbuf" "int offset" "int len" "caddr_t buf" 106.Ft struct mbuf * 107.Fo m_devget 108.Fa "char *buf" 109.Fa "int len" 110.Fa "int offset" 111.Fa "struct ifnet *ifp" 112.Fa "void (*copy)(char *from, caddr_t to, u_int len)" 113.Fc 114.Ft void 115.Fn m_cat "struct mbuf *m" "struct mbuf *n" 116.Ft u_int 117.Fn m_fixhdr "struct mbuf *mbuf" 118.Ft u_int 119.Fn m_length "struct mbuf *mbuf" "struct mbuf **last" 120.Ft struct mbuf * 121.Fn m_split "struct mbuf *mbuf" "int len" "int how" 122.\" 123.Sh DESCRIPTION 124An mbuf is a basic unit of memory management in the kernel IPC subsystem. 125Network packets and socket buffers are stored in mbufs. 126A network packet may span multiple mbufs arranged into a chain 127(linked list), 128which allows adding or trimming 129network headers with little overhead. 130.Pp 131While a developer should not bother with mbuf internals without serious 132reason in order to avoid incompatibilities with future changes, it 133is useful to understand the mbuf's general structure. 134.Pp 135An mbuf consists of a variable-sized header and a small internal 136buffer for data. 137The mbuf's total size, 138.Dv MSIZE , 139is a machine-dependent constant defined in 140.Pa machine/param.h . 141The mbuf header includes: 142.Pp 143.Bl -tag -width "m_nextpkt" -compact -offset indent 144.It Fa m_next 145a pointer to the next buffer in the chain 146.It Fa m_nextpkt 147a pointer to the next chain in the queue 148.It Fa m_data 149a pointer to the data 150.It Fa m_len 151the length of the data 152.It Fa m_type 153the type of data 154.It Fa m_flags 155the mbuf flags 156.El 157.Pp 158The mbuf flag bits are defined as follows: 159.Bd -literal 160/* mbuf flags */ 161#define M_EXT 0x0001 /* has associated external storage */ 162#define M_PKTHDR 0x0002 /* start of record */ 163#define M_EOR 0x0004 /* end of record */ 164#define M_RDONLY 0x0008 /* associated data marked read-only */ 165#define M_PROTO1 0x0010 /* protocol-specific */ 166#define M_PROTO2 0x0020 /* protocol-specific */ 167#define M_PROTO3 0x0040 /* protocol-specific */ 168#define M_PROTO4 0x0080 /* protocol-specific */ 169#define M_PROTO5 0x0100 /* protocol-specific */ 170 171/* mbuf pkthdr flags, also in m_flags */ 172#define M_BCAST 0x0200 /* send/received as link-level broadcast */ 173#define M_MCAST 0x0400 /* send/received as link-level multicast */ 174#define M_FRAG 0x0800 /* packet is fragment of larger packet */ 175#define M_FIRSTFRAG 0x1000 /* packet is first fragment */ 176#define M_LASTFRAG 0x2000 /* packet is last fragment */ 177.Ed 178.Pp 179The available mbuf types are defined as follows: 180.Bd -literal 181/* mbuf types */ 182#define MT_FREE 0 /* should be on free list */ 183#define MT_DATA 1 /* dynamic (data) allocation */ 184#define MT_HEADER 2 /* packet header */ 185#define MT_SONAME 8 /* socket name */ 186#define MT_FTABLE 11 /* fragment reassembly header */ 187#define MT_CONTROL 14 /* extra-data protocol message */ 188#define MT_OOBDATA 15 /* expedited data */ 189.Ed 190.Pp 191If the 192.Dv M_PKTHDR 193flag is set, a 194.Li struct pkthdr m_pkthdr 195is added to the mbuf header. 196It contains a pointer to the interface 197the packet has been received from 198.Pq Fa struct ifnet *rcvif , 199and the total packet length 200.Pq Fa int len . 201.Pp 202If small enough, data is stored in the mbuf's internal data buffer. 203If the data is sufficiently large, another mbuf may be added to the chain, 204or external storage may be associated with the mbuf. 205.Dv MHLEN 206bytes of data can fit into an mbuf with the 207.Dv M_PKTHDR 208flag set, 209.Dv MLEN 210bytes can otherwise. 211.Pp 212If external storage is being associated with an mbuf, the 213.Dv m_ext 214header is added at the cost of losing the internal data buffer. 215It includes a pointer to external storage, the size of the storage, 216a pointer to a function used for freeing the storage, 217a pointer to an optional argument that can be passed to the function, 218and a pointer to a reference counter. 219An mbuf using external storage has the 220.Dv M_EXT 221flag set. 222.Pp 223The system supplies a macro for allocating the desired external storage 224buffer, 225.Dv MEXTADD . 226.Pp 227The allocation and management of the reference counter is handled by the 228subsystem. 229The developer can check whether the reference count for the 230given mbuf's external storage is greater than 1 with the 231.Dv MEXT_IS_REF 232macro. 233Similarly, the developer can directly add and remove references, 234if absolutely necessary, with the use of the 235.Dv MEXT_ADD_REF 236and 237.Dv MEXT_REM_REF 238macros. 239.Pp 240The system also supplies a default type of external storage buffer called an 241.Dq mbuf cluster . 242Mbuf clusters can be allocated and configured with the use of the 243.Dv MCLGET 244macro. 245Each cluster is 246.Dv MCLBYTES 247in size, where MCLBYTES is a machine-dependent constant. 248The system defines an advisory macro 249.Dv MINCLSIZE , 250which is the smallest amount of data to put into a cluster. 251It's equal to the sum of 252.Dv MLEN 253and 254.Dv MHLEN . 255It is typically preferable to store data into an mbuf's data region, if size 256permits, as opposed to allocating a separate mbuf cluster to hold the same 257data. 258.\" 259.Ss Macros and Functions 260There are numerous predefined macros and functions that provide the 261developer with common utilities. 262.\" 263.Bl -ohang -offset indent 264.It Fn mtod mbuf type 265Convert an mbuf pointer to a data pointer. 266The macro expands to the data pointer cast to the pointer of the specified type. 267.Sy Note : 268It is advisable to ensure that there is enough contiguous data in the mbuf. 269See 270.Fn m_pullup 271for details. 272.It Fn MGET mbuf how type 273Allocate an mbuf and initialize it to contain internal data. 274.Fa mbuf 275will point to the allocated mbuf on success, or be set to 276.Dv NULL 277on failure. 278The 279.Fa how 280argument is to be set to 281.Dv M_TRYWAIT 282or 283.Dv M_DONTWAIT . 284It specifies whether the caller is willing to block if necessary. 285If 286.Fa how 287is set to 288.Dv M_TRYWAIT , 289a failed allocation will result in the caller being put 290to sleep for a designated 291kern.ipc.mbuf_wait 292.Xr ( sysctl 8 293tunable) 294number of ticks. 295A number of other mbuf-related 296functions and macros have the same argument because they may 297at some point need to allocate new mbufs. 298.Pp 299Programmers should be careful not to confuse the mbuf allocation flag 300.Dv M_DONTWAIT 301with the 302.Xr malloc 9 303allocation flag, 304.Dv M_NOWAIT . 305They are not the same. 306.It Fn MGETHDR mbuf how type 307Allocate an mbuf and initialize it to contain a packet header 308and internal data. 309See 310.Fn MGET 311for details. 312.It Fn MCLGET mbuf how 313Allocate and attach an mbuf cluster to an mbuf. 314If the macro fails, the 315.Dv M_EXT 316flag won't be set in the mbuf. 317.It Fn M_PREPEND mbuf len how 318This macro operates on an mbuf chain. 319It is an optimized wrapper for 320.Fn m_prepend 321that can make use of possible empty space before data 322(e.g. left after trimming of a link-layer header). 323The new chain pointer or 324.Dv NULL 325is in 326.Fa mbuf 327after the call. 328.It Fn M_WRITABLE mbuf 329This macro will evaluate true if the mbuf is not marked 330.Dv M_RDONLY 331and if either the mbuf does not contain external storage or, 332if it does, 333then if the reference count of the storage is not greater than 1. 334The 335.Dv M_RDONLY 336flag can be set in the mbuf's 337.Dv m_flags . 338This can be achieved during setup of the external storage, 339by passing the 340.Dv M_RDONLY 341bit as a 342.Fa flags 343argument to the 344.Fn MEXTADD 345macro, or can be directly set in individual mbufs. 346.El 347.Pp 348The functions are: 349.Bl -ohang -offset indent 350.It Fn m_get how type 351A function version of 352.Fn MGET 353for non-critical paths. 354.It Fn m_getm orig len how type 355Allocate 356.Fa len 357bytes worth of mbufs and mbuf clusters if necessary and append the resulting 358allocated chain to the 359.Fa orig 360mbuf chain, if it is 361.No non- Ns Dv NULL . 362If the allocation fails at any point, 363free whatever was allocated and return 364.Dv NULL . 365If 366.Fa orig 367is 368.No non- Ns Dv NULL , 369it will not be freed. 370It is possible to use 371.Fn m_getm 372to either append 373.Fa len 374bytes to an existing mbuf or mbuf chain 375(for example, one which may be sitting in a pre-allocated ring) 376or to simply perform an all-or-nothing mbuf and mbuf cluster allocation. 377.It Fn m_gethdr how type 378A function version of 379.Fn MGETHDR 380for non-critical paths. 381.It Fn m_getclr how type 382Allocate an mbuf and zero out the data region. 383.El 384.Pp 385The functions below operate on mbuf chains. 386.Bl -ohang -offset indent 387.It Fn m_freem mbuf 388Free an entire mbuf chain, including any external 389storage. 390.\" 391.It Fn m_adj mbuf len 392Trim 393.Fa len 394bytes from the head of an mbuf chain if 395.Fa len 396is positive, from the tail otherwise. 397.\" 398.It Fn m_prepend mbuf len how 399Allocate a new mbuf and prepend it to the chain, handle 400.Dv M_PKTHDR 401properly. 402.Sy Note : 403It doesn't allocate any clusters, so 404.Fa len 405must be less than 406.Dv MLEN 407or 408.Dv MHLEN , 409depending on the 410.Dv M_PKTHDR 411flag setting. 412.\" 413.It Fn m_pullup mbuf len 414Arrange that the first 415.Fa len 416bytes of an mbuf chain are contiguous and lay in the data area of 417.Fa mbuf , 418so they are accessible with 419.Fn mtod mbuf type . 420Return the new chain on success, 421.Dv NULL 422on failure 423(the chain is freed in this case). 424.Sy Note : 425It doesn't allocate any clusters, so 426.Fa len 427must be less than 428.Dv MHLEN . 429.\" 430.It Fn m_copym mbuf offset len how 431Make a copy of an mbuf chain starting 432.Fa offset 433bytes from the beginning, continuing for 434.Fa len 435bytes. 436If 437.Fa len 438is 439.Dv M_COPYALL , 440copy to the end of the mbuf chain. 441.Sy Note : 442The copy is read-only, because clusters are not 443copied, only their reference counts are incremented. 444.\" 445.It Fn m_copypacket mbuf how 446Copy an entire packet including header, which must be present. 447This is an optimized version of the common case 448.Fn m_copym mbuf 0 M_COPYALL how . 449.Sy Note : 450the copy is read-only, because clusters are not 451copied, only their reference counts are incremented. 452.\" 453.It Fn m_dup mbuf how 454Copy a packet header mbuf chain into a completely new chain, including 455copying any mbuf clusters. 456Use this instead of 457.Fn m_copypacket 458when you need a writable copy of an mbuf chain. 459.\" 460.It Fn m_copydata mbuf offset len buf 461Copy data from an mbuf chain starting 462.Fa off 463bytes from the beginning, continuing for 464.Fa len 465bytes, into the indicated buffer 466.Fa buf . 467.\" 468.It Fn m_copyback mbuf offset len buf 469Copy 470.Fa len 471bytes from the buffer 472.Fa buf 473back into the indicated mbuf chain, 474starting at 475.Fa offset 476bytes from the beginning of the chain, extending the mbuf chain if necessary. 477.Sy Note : 478It doesn't allocate any clusters, just adds mbufs to the chain. 479It's safe to set 480.Fa offset 481beyond the current chain end: zeroed mbufs will be allocated to fill the 482space. 483.\" 484.It Fn m_length buf last 485Return the length of the mbuf chain, and optionally a pointer to the last mbuf. 486.\" 487.It Fn m_fixhdr buf 488Set the packet-header length to the length of the mbuf chain. 489.\" 490.It Fn m_devget buf len offset ifp copy 491Copy data from a device local memory pointed to by 492.Fa buf 493to an mbuf chain. 494The copy is done using a specified copy routine 495.Fa copy , 496or 497.Fn bcopy 498if 499.Fa copy 500is 501.Dv NULL . 502.\" 503.It Fn m_cat m n 504Concatenate 505.Fa n 506to 507.Fa m . 508Both chains must be of the same type. 509.Fa N 510is still valid after the function returned. 511.Sy Note : 512It does not handle 513.Dv M_PKTHDR 514and friends. 515.\" 516.It Fn m_split mbuf len how 517Partition an mbuf chain in two pieces, returning the tail: 518all but the first 519.Fa len 520bytes. 521In case of failure, it returns 522.Dv NULL 523and attempts to restore the chain to its original state. 524.El 525.Sh STRESS TESTING 526When running a kernel compiled with the option 527.Dv MBUF_STRESS_TEST , 528the following 529.Xr sysctl 8 Ns 530-controlled options may be used to create 531various failure/extreme cases for testing of network drivers 532and other mbuf-reliant parts of the kernel. 533.Bl -tag -width ident 534.It Va net.inet.ip.mbuf_frag_size 535Causes 536.Fn ip_output 537to fragment outgoing mbuf chains into fragments of the specified size. 538Setting this variable to 1 is an excellent way to 539test the long mbuf chain handling ability of network drivers. 540.It Va kern.ipc.m_defragrandomfailures 541Causes the function 542.Fn m_defrag 543to randomly fail, returning 544.Dv NULL . 545Any piece of code which uses 546.Fn m_defrag 547should be tested with this feature. 548.El 549.Sh RETURN VALUES 550See above. 551.Sh HISTORY 552.\" Please correct me if I'm wrong 553Mbufs appeared in an early version of 554.Bx . 555Besides for being used for network packets, they were used 556to store various dynamic structures, such as routing table 557entries, interface addresses, protocol control blocks, etc. 558.Sh AUTHORS 559The original 560.Nm 561man page was written by Yar Tikhiy. 562