1.\" Copyright (c) 2000 FreeBSD Inc. 2.\" All rights reserved. 3.\" 4.\" Redistribution and use in source and binary forms, with or without 5.\" modification, are permitted provided that the following conditions 6.\" are met: 7.\" 1. Redistributions of source code must retain the above copyright 8.\" notice, this list of conditions and the following disclaimer. 9.\" 2. Redistributions in binary form must reproduce the above copyright 10.\" notice, this list of conditions and the following disclaimer in the 11.\" documentation and/or other materials provided with the distribution. 12.\" 13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16.\" ARE DISCLAIMED. IN NO EVENT SHALL [your name] OR CONTRIBUTORS BE LIABLE 17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23.\" SUCH DAMAGE. 24.\" 25.\" $FreeBSD$ 26.\" 27.Dd October 17, 2000 28.Dt MBUF 9 29.Os 30.\" 31.Sh NAME 32.Nm mbuf 33.Nd "memory management in the kernel IPC subsystem" 34.\" 35.Sh SYNOPSIS 36.In sys/param.h 37.In sys/systm.h 38.In sys/mbuf.h 39.\" 40.Ss Mbuf allocation macros 41.Fn MGET "struct mbuf *mbuf" "int how" "short type" 42.Fn MGETHDR "struct mbuf *mbuf" "int how" "short type" 43.Fn MCLGET "struct mbuf *mbuf" "int how" 44.Fo MEXTADD 45.Fa "struct mbuf *mbuf" 46.Fa "caddr_t buf" 47.Fa "u_int size" 48.Fa "void (*free)(void *opt_args)" 49.Fa "void *opt_args" 50.Fa "short flags" 51.Fa "int type" 52.Fc 53.Fn MEXTFREE "struct mbuf *mbuf" 54.Fn MEXT_ADD_REF "struct mbuf *mbuf" 55.Fn MEXT_REM_REF "struct mbuf *mbuf" 56.Fn MFREE "struct mbuf *mbuf" "struct mbuf *successor" 57.\" 58.Ss Mbuf utility macros 59.Ft void * 60.Fn mtod "struct mbuf *mbuf" "type" 61.Ft int 62.Fn MEXT_IS_REF "struct mbuf *mbuf" 63.Fn M_COPY_PKTHDR "struct mbuf *to" "struct mbuf *from" 64.Fn M_ALIGN "struct mbuf *mbuf" "u_int len" 65.Fn MH_ALIGN "struct mbuf *mbuf" "u_int len" 66.Ft int 67.Fn M_LEADINGSPACE "struct mbuf *mbuf" 68.Ft int 69.Fn M_TRAILINGSPACE "struct mbuf *mbuf" 70.Fn M_PREPEND "struct mbuf *mbuf" "int len" "int how" 71.Fn MCHTYPE "struct mbuf *mbuf" "u_int type" 72.Ft int 73.Fn M_WRITABLE "struct mbuf *mbuf" 74.\" 75.Ss Mbuf allocation functions 76.Ft struct mbuf * 77.Fn m_get "int how" "int type" 78.Ft struct mbuf * 79.Fn m_getm "struct mbuf *orig" "int len" "int how" "int type" 80.Ft struct mbuf * 81.Fn m_getclr "int how" "int type" 82.Ft struct mbuf * 83.Fn m_gethdr "int how" "int type" 84.Ft struct mbuf * 85.Fn m_free "struct mbuf *mbuf" 86.Ft void 87.Fn m_freem "struct mbuf *mbuf" 88.\" 89.Ss Mbuf utility functions 90.Ft void 91.Fn m_adj "struct mbuf *mbuf" "int len" 92.Ft struct mbuf * 93.Fn m_prepend "struct mbuf *mbuf" "int len" "int how" 94.Ft struct mbuf * 95.Fn m_pullup "struct mbuf *mbuf" "int len" 96.Ft struct mbuf * 97.Fn m_copym "struct mbuf *mbuf" "int offset" "int len" "int how" 98.Ft struct mbuf * 99.Fn m_copypacket "struct mbuf *mbuf" "int how" 100.Ft struct mbuf * 101.Fn m_dup "struct mbuf *mbuf" "int how" 102.Ft void 103.Fn m_copydata "const struct mbuf *mbuf" "int offset" "int len" "caddr_t buf" 104.Ft void 105.Fn m_copyback "struct mbuf *mbuf" "int offset" "int len" "caddr_t buf" 106.Ft struct mbuf * 107.Fo m_devget 108.Fa "char *buf" 109.Fa "int len" 110.Fa "int offset" 111.Fa "struct ifnet *ifp" 112.Fa "void (*copy)(char *from, caddr_t to, u_int len)" 113.Fc 114.Ft void 115.Fn m_cat "struct mbuf *m" "struct mbuf *n" 116.Ft u_int 117.Fn m_fixhdr "struct mbuf *mbuf" 118.Ft u_int 119.Fn m_length "struct mbuf *mbuf" "struct mbuf **last" 120.Ft struct mbuf * 121.Fn m_split "struct mbuf *mbuf" "int len" "int how" 122.\" 123.Sh DESCRIPTION 124An mbuf is a basic unit of memory management in the kernel IPC subsystem. 125Network packets and socket buffers are stored in mbufs. 126A network packet may span multiple mbufs arranged into a chain 127(linked list), 128which allows adding or trimming 129network headers with little overhead. 130.Pp 131While a developer should not bother with mbuf internals without serious 132reason in order to avoid incompatibilities with future changes, it 133is useful to understand the mbuf's general structure. 134.Pp 135An mbuf consists of a variable-sized header and a small internal 136buffer for data. 137The mbuf's total size, 138.Dv MSIZE , 139is a machine-dependent constant defined in 140.Pa machine/param.h . 141The mbuf header includes: 142.Pp 143.Bl -tag -width "m_nextpkt" -compact -offset indent 144.It Fa m_next 145a pointer to the next buffer in the chain 146.It Fa m_nextpkt 147a pointer to the next chain in the queue 148.It Fa m_data 149a pointer to the data 150.It Fa m_len 151the length of the data 152.It Fa m_type 153the type of data 154.It Fa m_flags 155the mbuf flags 156.El 157.Pp 158The mbuf flag bits are defined as follows: 159.Bd -literal 160/* mbuf flags */ 161#define M_EXT 0x0001 /* has associated external storage */ 162#define M_PKTHDR 0x0002 /* start of record */ 163#define M_EOR 0x0004 /* end of record */ 164#define M_RDONLY 0x0008 /* associated data marked read-only */ 165#define M_PROTO1 0x0010 /* protocol-specific */ 166#define M_PROTO2 0x0020 /* protocol-specific */ 167#define M_PROTO3 0x0040 /* protocol-specific */ 168#define M_PROTO4 0x0080 /* protocol-specific */ 169#define M_PROTO5 0x0100 /* protocol-specific */ 170 171/* mbuf pkthdr flags, also in m_flags */ 172#define M_BCAST 0x0200 /* send/received as link-level broadcast */ 173#define M_MCAST 0x0400 /* send/received as link-level multicast */ 174#define M_FRAG 0x0800 /* packet is fragment of larger packet */ 175#define M_FIRSTFRAG 0x1000 /* packet is first fragment */ 176#define M_LASTFRAG 0x2000 /* packet is last fragment */ 177.Ed 178.Pp 179The available mbuf types are defined as follows: 180.Bd -literal 181/* mbuf types */ 182#define MT_FREE 0 /* should be on free list */ 183#define MT_DATA 1 /* dynamic (data) allocation */ 184#define MT_HEADER 2 /* packet header */ 185#define MT_SONAME 8 /* socket name */ 186#define MT_FTABLE 11 /* fragment reassembly header */ 187#define MT_CONTROL 14 /* extra-data protocol message */ 188#define MT_OOBDATA 15 /* expedited data */ 189.Ed 190.Pp 191If the 192.Dv M_PKTHDR 193flag is set, a 194.Li struct pkthdr m_pkthdr 195is added to the mbuf header. 196It contains a pointer to the interface 197the packet has been received from 198.Pq Fa struct ifnet *rcvif , 199and the total packet length 200.Pq Fa int len . 201.Pp 202If small enough, data is stored in the mbuf's internal data buffer. 203If the data is sufficiently large, another mbuf may be added to the chain, 204or external storage may be associated with the mbuf. 205.Dv MHLEN 206bytes of data can fit into an mbuf with the 207.Dv M_PKTHDR 208flag set, 209.Dv MLEN 210bytes can otherwise. 211.Pp 212If external storage is being associated with an mbuf, the 213.Dv m_ext 214header is added at the cost of losing the internal data buffer. 215It includes a pointer to external storage, the size of the storage, 216a pointer to a function used for freeing the storage, 217a pointer to an optional argument that can be passed to the function, 218and a pointer to a reference counter. 219An mbuf using external storage has the 220.Dv M_EXT 221flag set. 222.Pp 223The system supplies a macro for allocating the desired external storage 224buffer, 225.Dv MEXTADD . 226.Pp 227The allocation and management of the reference counter is handled by the 228subsystem. 229The developer can check whether the reference count for the 230given mbuf's external storage is greater than 1 with the 231.Dv MEXT_IS_REF 232macro. 233Similarly, the developer can directly add and remove references, 234if absolutely necessary, with the use of the 235.Dv MEXT_ADD_REF 236and 237.Dv MEXT_REM_REF 238macros. 239.Pp 240The system also supplies a default type of external storage buffer called an 241.Dq mbuf cluster . 242Mbuf clusters can be allocated and configured with the use of the 243.Dv MCLGET 244macro. 245Each cluster is 246.Dv MCLBYTES 247in size, where MCLBYTES is a machine-dependent constant. 248The system defines an advisory macro 249.Dv MINCLSIZE , 250which is the smallest amount of data to put into a cluster. 251It's equal to the sum of 252.Dv MLEN 253and 254.Dv MHLEN . 255It is typically preferable to store data into an mbuf's data region, if size 256permits, as opposed to allocating a separate mbuf cluster to hold the same 257data. 258.\" 259.Ss Macros and Functions 260There are numerous predefined macros and functions that provide the 261developer with common utilities. 262.\" 263.Bl -ohang -offset indent 264.It Fn mtod mbuf type 265Convert an mbuf pointer to a data pointer. 266The macro expands to the data pointer cast to the pointer of the specified type. 267.Sy Note : 268It is advisable to ensure that there is enough contiguous data in the mbuf. 269See 270.Fn m_pullup 271for details. 272.It Fn MGET mbuf how type 273Allocate an mbuf and initialize it to contain internal data. 274.Fa mbuf 275will point to the allocated mbuf on success, or be set to 276.Dv NULL 277on failure. 278The 279.Fa how 280argument is to be set to 281.Dv 0 282or 283.Dv M_NOWAIT . 284It specifies whether the caller is willing to block if necessary. 285If 286.Fa how 287is set to 288.Dv 0 , 289a failed allocation will result in the caller being put 290to sleep for a designated 291kern.ipc.mbuf_wait 292.Xr ( sysctl 8 293tunable) 294number of ticks. 295A number of other mbuf-related 296functions and macros have the same argument because they may 297at some point need to allocate new mbufs. 298.It Fn MGETHDR mbuf how type 299Allocate an mbuf and initialize it to contain a packet header 300and internal data. 301See 302.Fn MGET 303for details. 304.It Fn MCLGET mbuf how 305Allocate and attach an mbuf cluster to an mbuf. 306If the macro fails, the 307.Dv M_EXT 308flag won't be set in the mbuf. 309.It Fn M_PREPEND mbuf len how 310This macro operates on an mbuf chain. 311It is an optimized wrapper for 312.Fn m_prepend 313that can make use of possible empty space before data 314(e.g. left after trimming of a link-layer header). 315The new chain pointer or 316.Dv NULL 317is in 318.Fa mbuf 319after the call. 320.It Fn M_WRITABLE mbuf 321This macro will evaluate true if the mbuf is not marked 322.Dv M_RDONLY 323and if either the mbuf does not contain external storage or, 324if it does, 325then if the reference count of the storage is not greater than 1. 326The 327.Dv M_RDONLY 328flag can be set in the mbuf's 329.Dv m_flags . 330This can be achieved during setup of the external storage, 331by passing the 332.Dv M_RDONLY 333bit as a 334.Fa flags 335argument to the 336.Fn MEXTADD 337macro, or can be directly set in individual mbufs. 338.El 339.Pp 340The functions are: 341.Bl -ohang -offset indent 342.It Fn m_get how type 343A function version of 344.Fn MGET 345for non-critical paths. 346.It Fn m_getm orig len how type 347Allocate 348.Fa len 349bytes worth of mbufs and mbuf clusters if necessary and append the resulting 350allocated chain to the 351.Fa orig 352mbuf chain, if it is 353.No non- Ns Dv NULL . 354If the allocation fails at any point, 355free whatever was allocated and return 356.Dv NULL . 357If 358.Fa orig 359is 360.No non- Ns Dv NULL , 361it will not be freed. 362It is possible to use 363.Fn m_getm 364to either append 365.Fa len 366bytes to an existing mbuf or mbuf chain 367(for example, one which may be sitting in a pre-allocated ring) 368or to simply perform an all-or-nothing mbuf and mbuf cluster allocation. 369.It Fn m_gethdr how type 370A function version of 371.Fn MGETHDR 372for non-critical paths. 373.It Fn m_getclr how type 374Allocate an mbuf and zero out the data region. 375.El 376.Pp 377The functions below operate on mbuf chains. 378.Bl -ohang -offset indent 379.It Fn m_freem mbuf 380Free an entire mbuf chain, including any external 381storage. 382.\" 383.It Fn m_adj mbuf len 384Trim 385.Fa len 386bytes from the head of an mbuf chain if 387.Fa len 388is positive, from the tail otherwise. 389.\" 390.It Fn m_prepend mbuf len how 391Allocate a new mbuf and prepend it to the chain, handle 392.Dv M_PKTHDR 393properly. 394.Sy Note : 395It doesn't allocate any clusters, so 396.Fa len 397must be less than 398.Dv MLEN 399or 400.Dv MHLEN , 401depending on the 402.Dv M_PKTHDR 403flag setting. 404.\" 405.It Fn m_pullup mbuf len 406Arrange that the first 407.Fa len 408bytes of an mbuf chain are contiguous and lay in the data area of 409.Fa mbuf , 410so they are accessible with 411.Fn mtod mbuf type . 412Return the new chain on success, 413.Dv NULL 414on failure 415(the chain is freed in this case). 416.Sy Note : 417It doesn't allocate any clusters, so 418.Fa len 419must be less than 420.Dv MHLEN . 421.\" 422.It Fn m_copym mbuf offset len how 423Make a copy of an mbuf chain starting 424.Fa offset 425bytes from the beginning, continuing for 426.Fa len 427bytes. 428If 429.Fa len 430is 431.Dv M_COPYALL , 432copy to the end of the mbuf chain. 433.Sy Note : 434The copy is read-only, because clusters are not 435copied, only their reference counts are incremented. 436.\" 437.It Fn m_copypacket mbuf how 438Copy an entire packet including header, which must be present. 439This is an optimized version of the common case 440.Fn m_copym mbuf 0 M_COPYALL how . 441.Sy Note : 442the copy is read-only, because clusters are not 443copied, only their reference counts are incremented. 444.\" 445.It Fn m_dup mbuf how 446Copy a packet header mbuf chain into a completely new chain, including 447copying any mbuf clusters. 448Use this instead of 449.Fn m_copypacket 450when you need a writable copy of an mbuf chain. 451.\" 452.It Fn m_copydata mbuf offset len buf 453Copy data from an mbuf chain starting 454.Fa off 455bytes from the beginning, continuing for 456.Fa len 457bytes, into the indicated buffer 458.Fa buf . 459.\" 460.It Fn m_copyback mbuf offset len buf 461Copy 462.Fa len 463bytes from the buffer 464.Fa buf 465back into the indicated mbuf chain, 466starting at 467.Fa offset 468bytes from the beginning of the chain, extending the mbuf chain if necessary. 469.Sy Note : 470It doesn't allocate any clusters, just adds mbufs to the chain. 471It's safe to set 472.Fa offset 473beyond the current chain end: zeroed mbufs will be allocated to fill the 474space. 475.\" 476.It Fn m_length buf last 477Return the length of the mbuf chain, and optionally a pointer to the last mbuf. 478.\" 479.It Fn m_fixhdr buf 480Set the packet-header length to the length of the mbuf chain. 481.\" 482.It Fn m_devget buf len offset ifp copy 483Copy data from a device local memory pointed to by 484.Fa buf 485to an mbuf chain. 486The copy is done using a specified copy routine 487.Fa copy , 488or 489.Fn bcopy 490if 491.Fa copy 492is 493.Dv NULL . 494.\" 495.It Fn m_cat m n 496Concatenate 497.Fa n 498to 499.Fa m . 500Both chains must be of the same type. 501.Fa N 502is still valid after the function returned. 503.Sy Note : 504It does not handle 505.Dv M_PKTHDR 506and friends. 507.\" 508.It Fn m_split mbuf len how 509Partition an mbuf chain in two pieces, returning the tail: 510all but the first 511.Fa len 512bytes. 513In case of failure, it returns 514.Dv NULL 515and attempts to restore the chain to its original state. 516.El 517.Sh RETURN VALUES 518See above. 519.Sh HISTORY 520.\" Please correct me if I'm wrong 521Mbufs appeared in an early version of 522.Bx . 523Besides for being used for network packets, they were used 524to store various dynamic structures, such as routing table 525entries, interface addresses, protocol control blocks, etc. 526.Sh AUTHORS 527The original 528.Nm 529man page was written by Yar Tikhiy. 530