1.\" Copyright (c) 2000 FreeBSD Inc. 2.\" All rights reserved. 3.\" 4.\" Redistribution and use in source and binary forms, with or without 5.\" modification, are permitted provided that the following conditions 6.\" are met: 7.\" 1. Redistributions of source code must retain the above copyright 8.\" notice, this list of conditions and the following disclaimer. 9.\" 2. Redistributions in binary form must reproduce the above copyright 10.\" notice, this list of conditions and the following disclaimer in the 11.\" documentation and/or other materials provided with the distribution. 12.\" 13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16.\" ARE DISCLAIMED. IN NO EVENT SHALL [your name] OR CONTRIBUTORS BE LIABLE 17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23.\" SUCH DAMAGE. 24.\" 25.\" $FreeBSD$ 26.\" 27.Dd October 17, 2000 28.Dt MBUF 9 29.Os 30.\" 31.Sh NAME 32.Nm mbuf 33.Nd "memory management in the kernel IPC subsystem" 34.\" 35.Sh SYNOPSIS 36.In sys/param.h 37.In sys/systm.h 38.In sys/mbuf.h 39.\" 40.Ss Mbuf allocation macros 41.Fn MGET "struct mbuf *mbuf" "int how" "short type" 42.Fn MGETHDR "struct mbuf *mbuf" "int how" "short type" 43.Fn MCLGET "struct mbuf *mbuf" "int how" 44.Fo MEXTADD 45.Fa "struct mbuf *mbuf" 46.Fa "caddr_t buf" 47.Fa "u_int size" 48.Fa "void (*free)(void *opt_args)" 49.Fa "void *opt_args" 50.Fa "short flags" 51.Fa "int type" 52.Fc 53.Fn MEXTFREE "struct mbuf *mbuf" 54.Fn MEXT_ADD_REF "struct mbuf *mbuf" 55.Fn MEXT_REM_REF "struct mbuf *mbuf" 56.Fn MFREE "struct mbuf *mbuf" "struct mbuf *successor" 57.\" 58.Ss Mbuf utility macros 59.Ft void * 60.Fn mtod "struct mbuf *mbuf" "type" 61.Ft int 62.Fn MEXT_IS_REF "struct mbuf *mbuf" 63.Fn M_ALIGN "struct mbuf *mbuf" "u_int len" 64.Fn MH_ALIGN "struct mbuf *mbuf" "u_int len" 65.Ft int 66.Fn M_LEADINGSPACE "struct mbuf *mbuf" 67.Ft int 68.Fn M_TRAILINGSPACE "struct mbuf *mbuf" 69.Fn M_MOVE_PKTHDR "struct mbuf *to" "struct mbuf *from" 70.Fn M_PREPEND "struct mbuf *mbuf" "int len" "int how" 71.Fn MCHTYPE "struct mbuf *mbuf" "u_int type" 72.Ft int 73.Fn M_WRITABLE "struct mbuf *mbuf" 74.\" 75.Ss Mbuf allocation functions 76.Ft struct mbuf * 77.Fn m_get "int how" "int type" 78.Ft struct mbuf * 79.Fn m_getm "struct mbuf *orig" "int len" "int how" "int type" 80.Ft struct mbuf * 81.Fn m_getclr "int how" "int type" 82.Ft struct mbuf * 83.Fn m_gethdr "int how" "int type" 84.Ft struct mbuf * 85.Fn m_free "struct mbuf *mbuf" 86.Ft void 87.Fn m_freem "struct mbuf *mbuf" 88.\" 89.Ss Mbuf utility functions 90.Ft void 91.Fn m_adj "struct mbuf *mbuf" "int len" 92.Ft struct mbuf * 93.Fn m_prepend "struct mbuf *mbuf" "int len" "int how" 94.Ft struct mbuf * 95.Fn m_pullup "struct mbuf *mbuf" "int len" 96.Ft struct mbuf * 97.Fn m_copym "struct mbuf *mbuf" "int offset" "int len" "int how" 98.Ft struct mbuf * 99.Fn m_copypacket "struct mbuf *mbuf" "int how" 100.Ft struct mbuf * 101.Fn m_dup "struct mbuf *mbuf" "int how" 102.Ft void 103.Fn m_copydata "const struct mbuf *mbuf" "int offset" "int len" "caddr_t buf" 104.Ft void 105.Fn m_copyback "struct mbuf *mbuf" "int offset" "int len" "caddr_t buf" 106.Ft struct mbuf * 107.Fo m_devget 108.Fa "char *buf" 109.Fa "int len" 110.Fa "int offset" 111.Fa "struct ifnet *ifp" 112.Fa "void (*copy)(char *from, caddr_t to, u_int len)" 113.Fc 114.Ft void 115.Fn m_cat "struct mbuf *m" "struct mbuf *n" 116.Ft u_int 117.Fn m_fixhdr "struct mbuf *mbuf" 118.Ft void 119.Fn m_dup_pkthdr "struct mbuf *to" "struct mbuf *from" 120.Ft void 121.Fn m_move_pkthdr "struct mbuf *to" "struct mbuf *from" 122.Ft u_int 123.Fn m_length "struct mbuf *mbuf" "struct mbuf **last" 124.Ft struct mbuf * 125.Fn m_split "struct mbuf *mbuf" "int len" "int how" 126.\" 127.Sh DESCRIPTION 128An 129.Vt mbuf 130is a basic unit of memory management in the kernel IPC subsystem. 131Network packets and socket buffers are stored in 132.Vt mbufs . 133A network packet may span multiple 134.Vt mbufs 135arranged into a 136.Vt mbuf chain 137(linked list), 138which allows adding or trimming 139network headers with little overhead. 140.Pp 141While a developer should not bother with 142.Vt mbuf 143internals without serious 144reason in order to avoid incompatibilities with future changes, it 145is useful to understand the general structure of an 146.Vt mbuf . 147.Pp 148An 149.Vt mbuf 150consists of a variable-sized header and a small internal 151buffer for data. 152The total size of an 153.Vt mbuf , 154.Dv MSIZE , 155is a machine-dependent constant defined in 156.Pa machine/param.h . 157The 158.Vt mbuf 159header includes: 160.Pp 161.Bl -tag -width "m_nextpkt" -compact -offset indent 162.It Va m_next 163a pointer to the next 164.Vt mbuf 165in the 166.Vt mbuf chain 167.It Va m_nextpkt 168a pointer to the next 169.Vt mbuf chain 170in the queue 171.It Va m_data 172a pointer to the data 173.It Va m_len 174the length of the data 175.It Va m_type 176the type of data 177.It Va m_flags 178the 179.Vt mbuf 180flags 181.El 182.Pp 183The 184.Vt mbuf 185flag bits are defined as follows: 186.Bd -literal 187/* mbuf flags */ 188#define M_EXT 0x0001 /* has associated external storage */ 189#define M_PKTHDR 0x0002 /* start of record */ 190#define M_EOR 0x0004 /* end of record */ 191#define M_RDONLY 0x0008 /* associated data marked read-only */ 192#define M_PROTO1 0x0010 /* protocol-specific */ 193#define M_PROTO2 0x0020 /* protocol-specific */ 194#define M_PROTO3 0x0040 /* protocol-specific */ 195#define M_PROTO4 0x0080 /* protocol-specific */ 196#define M_PROTO5 0x0100 /* protocol-specific */ 197 198/* mbuf pkthdr flags, also in m_flags */ 199#define M_BCAST 0x0200 /* send/received as link-level broadcast */ 200#define M_MCAST 0x0400 /* send/received as link-level multicast */ 201#define M_FRAG 0x0800 /* packet is fragment of larger packet */ 202#define M_FIRSTFRAG 0x1000 /* packet is first fragment */ 203#define M_LASTFRAG 0x2000 /* packet is last fragment */ 204.Ed 205.Pp 206The available 207.Vt mbuf 208types are defined as follows: 209.Bd -literal 210/* mbuf types */ 211#define MT_FREE 0 /* should be on free list */ 212#define MT_DATA 1 /* dynamic (data) allocation */ 213#define MT_HEADER 2 /* packet header */ 214#define MT_SONAME 8 /* socket name */ 215#define MT_FTABLE 11 /* fragment reassembly header */ 216#define MT_CONTROL 14 /* extra-data protocol message */ 217#define MT_OOBDATA 15 /* expedited data */ 218.Ed 219.Pp 220If the 221.Dv M_PKTHDR 222flag is set, a 223.Vt struct pkthdr Va m_pkthdr 224is added to the 225.Vt mbuf 226header. 227It contains a pointer to the interface 228the packet has been received from 229.Pq Vt struct ifnet Va *rcvif , 230and the total packet length 231.Pq Vt int Va len . 232.Pp 233If small enough, data is stored in the internal data buffer of an 234.Vt mbuf . 235If the data is sufficiently large, another 236.Vt mbuf 237may be added to the 238.Vt mbuf chain , 239or external storage may be associated with the 240.Vt mbuf . 241.Dv MHLEN 242bytes of data can fit into an 243.Vt mbuf 244with the 245.Dv M_PKTHDR 246flag set, 247.Dv MLEN 248bytes can otherwise. 249.Pp 250If external storage is being associated with an 251.Vt mbuf , 252the 253.Va m_ext 254header is added at the cost of losing the internal data buffer. 255It includes a pointer to external storage, the size of the storage, 256a pointer to a function used for freeing the storage, 257a pointer to an optional argument that can be passed to the function, 258and a pointer to a reference counter. 259An 260.Vt mbuf 261using external storage has the 262.Dv M_EXT 263flag set. 264.Pp 265The system supplies a macro for allocating the desired external storage 266buffer, 267.Dv MEXTADD . 268.Pp 269The allocation and management of the reference counter is handled by the 270subsystem. 271The developer can check whether the reference count for the 272external storage of a given 273.Vt mbuf 274is greater than 1 with the 275.Dv MEXT_IS_REF 276macro. 277Similarly, the developer can directly add and remove references, 278if absolutely necessary, with the use of the 279.Dv MEXT_ADD_REF 280and 281.Dv MEXT_REM_REF 282macros. 283.Pp 284The system also supplies a default type of external storage buffer called an 285.Vt mbuf cluster . 286.Vt Mbuf clusters 287can be allocated and configured with the use of the 288.Dv MCLGET 289macro. 290Each 291.Vt mbuf cluster 292is 293.Dv MCLBYTES 294in size, where MCLBYTES is a machine-dependent constant. 295The system defines an advisory macro 296.Dv MINCLSIZE , 297which is the smallest amount of data to put into an 298.Vt mbuf cluster . 299It's equal to the sum of 300.Dv MLEN 301and 302.Dv MHLEN . 303It is typically preferable to store data into the data region of an 304.Vt mbuf , 305if size permits, as opposed to allocating a separate 306.Vt mbuf cluster 307to hold the same data. 308.\" 309.Ss Macros and Functions 310There are numerous predefined macros and functions that provide the 311developer with common utilities. 312.\" 313.Bl -ohang -offset indent 314.It Fn mtod mbuf type 315Convert an 316.Fa mbuf 317pointer to a data pointer. 318The macro expands to the data pointer cast to the pointer of the specified 319.Fa type . 320.Sy Note : 321It is advisable to ensure that there is enough contiguous data in 322.Fa mbuf . 323See 324.Fn m_pullup 325for details. 326.It Fn MGET mbuf how type 327Allocate an 328.Vt mbuf 329and initialize it to contain internal data. 330.Fa mbuf 331will point to the allocated 332.Vt mbuf 333on success, or be set to 334.Dv NULL 335on failure. 336The 337.Fa how 338argument is to be set to 339.Dv M_TRYWAIT 340or 341.Dv M_DONTWAIT . 342It specifies whether the caller is willing to block if necessary. 343If 344.Fa how 345is set to 346.Dv M_TRYWAIT , 347a failed allocation will result in the caller being put 348to sleep for a designated 349kern.ipc.mbuf_wait 350.Xr ( sysctl 8 351tunable) 352number of ticks. 353A number of other functions and macros related to 354.Vt mbufs 355have the same argument because they may 356at some point need to allocate new 357.Vt mbufs . 358.Pp 359Programmers should be careful not to confuse the 360.Vt mbuf 361allocation flag 362.Dv M_DONTWAIT 363with the 364.Xr malloc 9 365allocation flag, 366.Dv M_NOWAIT . 367They are not the same. 368.It Fn MGETHDR mbuf how type 369Allocate an 370.Vt mbuf 371and initialize it to contain a packet header 372and internal data. 373See 374.Fn MGET 375for details. 376.It Fn MCLGET mbuf how 377Allocate and attach an 378.Vt mbuf cluster 379to 380.Fa mbuf . 381If the macro fails, the 382.Dv M_EXT 383flag won't be set in 384.Fa mbuf . 385.It Fn M_ALIGN mbuf len 386Set the pointer 387.Fa mbuf->m_data 388to place an object of the size 389.Fa len 390at the end of the internal data area of 391.Fa mbuf , 392long word aligned. 393Applicable only if 394.Fa mbuf 395is newly allocated with 396.Fn MGET 397or 398.Fn m_get . 399.It Fn MH_ALIGN mbuf len 400Serves the same purpose as 401.Fn M_ALIGN 402does, but only for 403.Fa mbuf 404newly allocated with 405.Fn MGETHDR 406or 407.Fn m_gethdr , 408or initialized by 409.Fn m_dup_pkthdr 410or 411.Fn m_move_pkthdr . 412.It Fn M_LEADINGSPACE mbuf 413Returns the number of bytes available before the beginning 414of data in 415.Fa mbuf . 416.It Fn M_TRAILINGSPACE mbuf 417Returns the number of bytes available after the end of data in 418.Fa mbuf . 419.It Fn M_PREPEND mbuf len how 420This macro operates on an 421.Vt mbuf chain . 422It is an optimized wrapper for 423.Fn m_prepend 424that can make use of possible empty space before data 425(e.g. left after trimming of a link-layer header). 426The new 427.Vt mbuf chain 428pointer or 429.Dv NULL 430is in 431.Fa mbuf 432after the call. 433.It Fn M_MOVE_PKTHDR to from 434Using this macro is equivalent to calling 435.Fn m_move_pkthdr to from . 436.It Fn M_WRITABLE mbuf 437This macro will evaluate true if 438.Fa mbuf 439is not marked 440.Dv M_RDONLY 441and if either 442.Fa mbuf 443does not contain external storage or, 444if it does, 445then if the reference count of the storage is not greater than 1. 446The 447.Dv M_RDONLY 448flag can be set in 449.Fa mbuf->m_flags . 450This can be achieved during setup of the external storage, 451by passing the 452.Dv M_RDONLY 453bit as a 454.Fa flags 455argument to the 456.Fn MEXTADD 457macro, or can be directly set in individual 458.Vt mbufs . 459.It Fn MCHTYPE mbuf type 460Change the type of 461.Fa mbuf 462to 463.Fa type . 464This is a relatively expensive operation and should be avoided. 465.El 466.Pp 467The functions are: 468.Bl -ohang -offset indent 469.It Fn m_get how type 470A function version of 471.Fn MGET 472for non-critical paths. 473.It Fn m_getm orig len how type 474Allocate 475.Fa len 476bytes worth of 477.Vt mbufs 478and 479.Vt mbuf clusters 480if necessary and append the resulting allocated 481.Vt mbuf chain 482to the 483.Vt mbuf chain 484.Fa orig , 485if it is 486.No non- Ns Dv NULL . 487If the allocation fails at any point, 488free whatever was allocated and return 489.Dv NULL . 490If 491.Fa orig 492is 493.No non- Ns Dv NULL , 494it will not be freed. 495It is possible to use 496.Fn m_getm 497to either append 498.Fa len 499bytes to an existing 500.Vt mbuf 501or 502.Vt mbuf chain 503(for example, one which may be sitting in a pre-allocated ring) 504or to simply perform an all-or-nothing 505.Vt mbuf 506and 507.Vt mbuf cluster 508allocation. 509.It Fn m_gethdr how type 510A function version of 511.Fn MGETHDR 512for non-critical paths. 513.It Fn m_getclr how type 514Allocate an 515.Vt mbuf 516and zero out the data region. 517.El 518.Pp 519The functions below operate on 520.Vt mbuf chains . 521.Bl -ohang -offset indent 522.It Fn m_freem mbuf 523Free an entire 524.Vt mbuf chain , 525including any external storage. 526.\" 527.It Fn m_adj mbuf len 528Trim 529.Fa len 530bytes from the head of an 531.Vt mbuf chain 532if 533.Fa len 534is positive, from the tail otherwise. 535.\" 536.It Fn m_prepend mbuf len how 537Allocate a new 538.Vt mbuf 539and prepend it to the 540.Vt mbuf chain , 541handle 542.Dv M_PKTHDR 543properly. 544.Sy Note : 545It doesn't allocate any 546.Vt mbuf clusters , 547so 548.Fa len 549must be less than 550.Dv MLEN 551or 552.Dv MHLEN , 553depending on the 554.Dv M_PKTHDR 555flag setting. 556.\" 557.It Fn m_pullup mbuf len 558Arrange that the first 559.Fa len 560bytes of an 561.Vt mbuf chain 562are contiguous and lay in the data area of 563.Fa mbuf , 564so they are accessible with 565.Fn mtod mbuf type . 566Return the new 567.Vt mbuf chain 568on success, 569.Dv NULL 570on failure 571(the 572.Vt mbuf chain 573is freed in this case). 574.Sy Note : 575It doesn't allocate any 576.Vt mbuf clusters , 577so 578.Fa len 579must be less than 580.Dv MHLEN . 581.\" 582.It Fn m_copym mbuf offset len how 583Make a copy of an 584.Vt mbuf chain 585starting 586.Fa offset 587bytes from the beginning, continuing for 588.Fa len 589bytes. 590If 591.Fa len 592is 593.Dv M_COPYALL , 594copy to the end of the 595.Vt mbuf chain . 596.Sy Note : 597The copy is read-only, because the 598.Vt mbuf clusters 599are not copied, only their reference counts are incremented. 600.\" 601.It Fn m_copypacket mbuf how 602Copy an entire packet including header, which must be present. 603This is an optimized version of the common case 604.Fn m_copym mbuf 0 M_COPYALL how . 605.Sy Note : 606the copy is read-only, because the 607.Vt mbuf clusters 608are not copied, only their reference counts are incremented. 609.\" 610.It Fn m_dup mbuf how 611Copy a packet header 612.Vt mbuf chain 613into a completely new 614.Vt mbuf chain , 615including copying any 616.Vt mbuf clusters . 617Use this instead of 618.Fn m_copypacket 619when you need a writable copy of an 620.Vt mbuf chain . 621.\" 622.It Fn m_copydata mbuf offset len buf 623Copy data from an 624.Vt mbuf chain 625starting 626.Fa off 627bytes from the beginning, continuing for 628.Fa len 629bytes, into the indicated buffer 630.Fa buf . 631.\" 632.It Fn m_copyback mbuf offset len buf 633Copy 634.Fa len 635bytes from the buffer 636.Fa buf 637back into the indicated 638.Vt mbuf chain , 639starting at 640.Fa offset 641bytes from the beginning of the 642.Vt mbuf chain , 643extending the 644.Vt mbuf chain 645if necessary. 646.Sy Note : 647It doesn't allocate any 648.Vt mbuf clusters , 649just adds 650.Vt mbufs 651to the 652.Vt mbuf chain . 653It's safe to set 654.Fa offset 655beyond the current 656.Vt mbuf chain 657end: zeroed 658.Vt mbufs 659will be allocated to fill the space. 660.\" 661.It Fn m_length mbuf last 662Return the length of the 663.Vt mbuf chain , 664and optionally a pointer to the last 665.Vt mbuf . 666.\" 667.It Fn m_dup_pkthdr to from how 668Upon the function's completion, the 669.Vt mbuf 670.Fa to 671will contain an identical copy of 672.Fa from->m_pkthdr 673and the per-packet attributes found in the 674.Vt mbuf chain 675.Fa from . 676The 677.Vt mbuf 678.Fa from 679must have the flag 680.Dv M_PKTHDR 681initially set, and 682.Fa to 683must be empty on entry. 684.\" 685.It Fn m_move_pkthdr to from 686Move 687.Va m_pkthdr 688and the per-packet attributes from the 689.Vt mbuf chain 690.Fa from 691to the 692.Vt mbuf 693.Fa to . 694The 695.Vt mbuf 696.Fa from 697must have the flag 698.Dv M_PKTHDR 699initially set, and 700.Fa to 701must be empty on entry. 702Upon the function's completion, 703.Fa from 704will have the flag 705.Dv M_PKTHDR 706and the per-packet attributes cleared. 707.\" 708.It Fn m_fixhdr mbuf 709Set the packet-header length to the length of the 710.Vt mbuf chain . 711.\" 712.It Fn m_devget buf len offset ifp copy 713Copy data from a device local memory pointed to by 714.Fa buf 715to an 716.Vt mbuf chain . 717The copy is done using a specified copy routine 718.Fa copy , 719or 720.Fn bcopy 721if 722.Fa copy 723is 724.Dv NULL . 725.\" 726.It Fn m_cat m n 727Concatenate 728.Fa n 729to 730.Fa m . 731Both 732.Vt mbuf chains 733must be of the same type. 734.Fa N 735is still valid after the function returned. 736.Sy Note : 737It does not handle 738.Dv M_PKTHDR 739and friends. 740.\" 741.It Fn m_split mbuf len how 742Partition an 743.Vt mbuf chain 744in two pieces, returning the tail: 745all but the first 746.Fa len 747bytes. 748In case of failure, it returns 749.Dv NULL 750and attempts to restore the 751.Vt mbuf chain 752to its original state. 753.El 754.Sh STRESS TESTING 755When running a kernel compiled with the option 756.Dv MBUF_STRESS_TEST , 757the following 758.Xr sysctl 8 Ns 759-controlled options may be used to create 760various failure/extreme cases for testing of network drivers 761and other parts of the kernel that rely on 762.Vt mbufs . 763.Bl -tag -width ident 764.It Va net.inet.ip.mbuf_frag_size 765Causes 766.Fn ip_output 767to fragment outgoing 768.Vt mbuf chains 769into fragments of the specified size. 770Setting this variable to 1 is an excellent way to 771test the long 772.Vt mbuf chain 773handling ability of network drivers. 774.It Va kern.ipc.m_defragrandomfailures 775Causes the function 776.Fn m_defrag 777to randomly fail, returning 778.Dv NULL . 779Any piece of code which uses 780.Fn m_defrag 781should be tested with this feature. 782.El 783.Sh RETURN VALUES 784See above. 785.Sh HISTORY 786.\" Please correct me if I'm wrong 787.Vt Mbufs 788appeared in an early version of 789.Bx . 790Besides for being used for network packets, they were used 791to store various dynamic structures, such as routing table 792entries, interface addresses, protocol control blocks, etc. 793.Sh AUTHORS 794The original 795.Nm 796man page was written by Yar Tikhiy. 797