1.\" Copyright (c) 2000 FreeBSD Inc. 2.\" All rights reserved. 3.\" 4.\" Redistribution and use in source and binary forms, with or without 5.\" modification, are permitted provided that the following conditions 6.\" are met: 7.\" 1. Redistributions of source code must retain the above copyright 8.\" notice, this list of conditions and the following disclaimer. 9.\" 2. Redistributions in binary form must reproduce the above copyright 10.\" notice, this list of conditions and the following disclaimer in the 11.\" documentation and/or other materials provided with the distribution. 12.\" 13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16.\" ARE DISCLAIMED. IN NO EVENT SHALL [your name] OR CONTRIBUTORS BE LIABLE 17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23.\" SUCH DAMAGE. 24.\" 25.\" $FreeBSD$ 26.\" 27.Dd February 26, 2007 28.Dt MBUF 9 29.Os 30.\" 31.Sh NAME 32.Nm mbuf 33.Nd "memory management in the kernel IPC subsystem" 34.\" 35.Sh SYNOPSIS 36.In sys/param.h 37.In sys/systm.h 38.In sys/mbuf.h 39.\" 40.Ss Mbuf allocation macros 41.Fn MGET "struct mbuf *mbuf" "int how" "short type" 42.Fn MGETHDR "struct mbuf *mbuf" "int how" "short type" 43.Fn MCLGET "struct mbuf *mbuf" "int how" 44.Fo MEXTADD 45.Fa "struct mbuf *mbuf" 46.Fa "caddr_t buf" 47.Fa "u_int size" 48.Fa "void (*free)(void *opt_arg1, void *opt_arg2)" 49.Fa "void *opt_arg1" 50.Fa "void *opt_arg2" 51.Fa "short flags" 52.Fa "int type" 53.Fc 54.Fn MEXTFREE "struct mbuf *mbuf" 55.Fn MFREE "struct mbuf *mbuf" "struct mbuf *successor" 56.\" 57.Ss Mbuf utility macros 58.Fn mtod "struct mbuf *mbuf" "type" 59.Fn M_ALIGN "struct mbuf *mbuf" "u_int len" 60.Fn MH_ALIGN "struct mbuf *mbuf" "u_int len" 61.Ft int 62.Fn M_LEADINGSPACE "struct mbuf *mbuf" 63.Ft int 64.Fn M_TRAILINGSPACE "struct mbuf *mbuf" 65.Fn M_MOVE_PKTHDR "struct mbuf *to" "struct mbuf *from" 66.Fn M_PREPEND "struct mbuf *mbuf" "int len" "int how" 67.Fn MCHTYPE "struct mbuf *mbuf" "u_int type" 68.Ft int 69.Fn M_WRITABLE "struct mbuf *mbuf" 70.\" 71.Ss Mbuf allocation functions 72.Ft struct mbuf * 73.Fn m_get "int how" "int type" 74.Ft struct mbuf * 75.Fn m_getm "struct mbuf *orig" "int len" "int how" "int type" 76.Ft struct mbuf * 77.Fn m_getcl "int how" "short type" "int flags" 78.Ft struct mbuf * 79.Fn m_getclr "int how" "int type" 80.Ft struct mbuf * 81.Fn m_gethdr "int how" "int type" 82.Ft struct mbuf * 83.Fn m_free "struct mbuf *mbuf" 84.Ft void 85.Fn m_freem "struct mbuf *mbuf" 86.\" 87.Ss Mbuf utility functions 88.Ft void 89.Fn m_adj "struct mbuf *mbuf" "int len" 90.Ft void 91.Fn m_align "struct mbuf *mbuf" "int len" 92.Ft int 93.Fn m_append "struct mbuf *mbuf" "int len" "c_caddr_t cp" 94.Ft struct mbuf * 95.Fn m_prepend "struct mbuf *mbuf" "int len" "int how" 96.Ft struct mbuf * 97.Fn m_copyup "struct mbuf *mbuf" "int len" "int dstoff" 98.Ft struct mbuf * 99.Fn m_pullup "struct mbuf *mbuf" "int len" 100.Ft struct mbuf * 101.Fn m_pulldown "struct mbuf *mbuf" "int offset" "int len" "int *offsetp" 102.Ft struct mbuf * 103.Fn m_copym "struct mbuf *mbuf" "int offset" "int len" "int how" 104.Ft struct mbuf * 105.Fn m_copypacket "struct mbuf *mbuf" "int how" 106.Ft struct mbuf * 107.Fn m_dup "struct mbuf *mbuf" "int how" 108.Ft void 109.Fn m_copydata "const struct mbuf *mbuf" "int offset" "int len" "caddr_t buf" 110.Ft void 111.Fn m_copyback "struct mbuf *mbuf" "int offset" "int len" "caddr_t buf" 112.Ft struct mbuf * 113.Fo m_devget 114.Fa "char *buf" 115.Fa "int len" 116.Fa "int offset" 117.Fa "struct ifnet *ifp" 118.Fa "void (*copy)(char *from, caddr_t to, u_int len)" 119.Fc 120.Ft void 121.Fn m_cat "struct mbuf *m" "struct mbuf *n" 122.Ft u_int 123.Fn m_fixhdr "struct mbuf *mbuf" 124.Ft void 125.Fn m_dup_pkthdr "struct mbuf *to" "struct mbuf *from" 126.Ft void 127.Fn m_move_pkthdr "struct mbuf *to" "struct mbuf *from" 128.Ft u_int 129.Fn m_length "struct mbuf *mbuf" "struct mbuf **last" 130.Ft struct mbuf * 131.Fn m_split "struct mbuf *mbuf" "int len" "int how" 132.Ft int 133.Fn m_apply "struct mbuf *mbuf" "int off" "int len" "int (*f)(void *arg, void *data, u_int len)" "void *arg" 134.Ft struct mbuf * 135.Fn m_getptr "struct mbuf *mbuf" "int loc" "int *off" 136.Ft struct mbuf * 137.Fn m_defrag "struct mbuf *m0" "int how" 138.Ft struct mbuf * 139.Fn m_unshare "struct mbuf *m0" "int how" 140.\" 141.Sh DESCRIPTION 142An 143.Vt mbuf 144is a basic unit of memory management in the kernel IPC subsystem. 145Network packets and socket buffers are stored in 146.Vt mbufs . 147A network packet may span multiple 148.Vt mbufs 149arranged into a 150.Vt mbuf chain 151(linked list), 152which allows adding or trimming 153network headers with little overhead. 154.Pp 155While a developer should not bother with 156.Vt mbuf 157internals without serious 158reason in order to avoid incompatibilities with future changes, it 159is useful to understand the general structure of an 160.Vt mbuf . 161.Pp 162An 163.Vt mbuf 164consists of a variable-sized header and a small internal 165buffer for data. 166The total size of an 167.Vt mbuf , 168.Dv MSIZE , 169is a constant defined in 170.In sys/param.h . 171The 172.Vt mbuf 173header includes: 174.Pp 175.Bl -tag -width "m_nextpkt" -offset indent 176.It Va m_next 177.Pq Vt struct mbuf * 178A pointer to the next 179.Vt mbuf 180in the 181.Vt mbuf chain . 182.It Va m_nextpkt 183.Pq Vt struct mbuf * 184A pointer to the next 185.Vt mbuf chain 186in the queue. 187.It Va m_data 188.Pq Vt caddr_t 189A pointer to data attached to this 190.Vt mbuf . 191.It Va m_len 192.Pq Vt int 193The length of the data. 194.It Va m_type 195.Pq Vt short 196The type of the data. 197.It Va m_flags 198.Pq Vt int 199The 200.Vt mbuf 201flags. 202.El 203.Pp 204The 205.Vt mbuf 206flag bits are defined as follows: 207.Bd -literal 208/* mbuf flags */ 209#define M_EXT 0x0001 /* has associated external storage */ 210#define M_PKTHDR 0x0002 /* start of record */ 211#define M_EOR 0x0004 /* end of record */ 212#define M_RDONLY 0x0008 /* associated data marked read-only */ 213#define M_PROTO1 0x0010 /* protocol-specific */ 214#define M_PROTO2 0x0020 /* protocol-specific */ 215#define M_PROTO3 0x0040 /* protocol-specific */ 216#define M_PROTO4 0x0080 /* protocol-specific */ 217#define M_PROTO5 0x0100 /* protocol-specific */ 218#define M_PROTO6 0x4000 /* protocol-specific (avoid M_BCAST conflict) */ 219#define M_FREELIST 0x8000 /* mbuf is on the free list */ 220 221/* mbuf pkthdr flags (also stored in m_flags) */ 222#define M_BCAST 0x0200 /* send/received as link-level broadcast */ 223#define M_MCAST 0x0400 /* send/received as link-level multicast */ 224#define M_FRAG 0x0800 /* packet is fragment of larger packet */ 225#define M_FIRSTFRAG 0x1000 /* packet is first fragment */ 226#define M_LASTFRAG 0x2000 /* packet is last fragment */ 227.Ed 228.Pp 229The available 230.Vt mbuf 231types are defined as follows: 232.Bd -literal 233/* mbuf types */ 234#define MT_DATA 1 /* dynamic (data) allocation */ 235#define MT_HEADER MT_DATA /* packet header */ 236#define MT_SONAME 8 /* socket name */ 237#define MT_CONTROL 14 /* extra-data protocol message */ 238#define MT_OOBDATA 15 /* expedited data */ 239.Ed 240.Pp 241If the 242.Dv M_PKTHDR 243flag is set, a 244.Vt struct pkthdr Va m_pkthdr 245is added to the 246.Vt mbuf 247header. 248It contains a pointer to the interface 249the packet has been received from 250.Pq Vt struct ifnet Va *rcvif , 251and the total packet length 252.Pq Vt int Va len . 253Optionally, it may also contain an attached list of packet tags 254.Pq Vt "struct m_tag" . 255See 256.Xr mbuf_tags 9 257for details. 258Fields used in offloading checksum calculation to the hardware are kept in 259.Va m_pkthdr 260as well. 261See 262.Sx HARDWARE-ASSISTED CHECKSUM CALCULATION 263for details. 264.Pp 265If small enough, data is stored in the internal data buffer of an 266.Vt mbuf . 267If the data is sufficiently large, another 268.Vt mbuf 269may be added to the 270.Vt mbuf chain , 271or external storage may be associated with the 272.Vt mbuf . 273.Dv MHLEN 274bytes of data can fit into an 275.Vt mbuf 276with the 277.Dv M_PKTHDR 278flag set, 279.Dv MLEN 280bytes can otherwise. 281.Pp 282If external storage is being associated with an 283.Vt mbuf , 284the 285.Va m_ext 286header is added at the cost of losing the internal data buffer. 287It includes a pointer to external storage, the size of the storage, 288a pointer to a function used for freeing the storage, 289a pointer to an optional argument that can be passed to the function, 290and a pointer to a reference counter. 291An 292.Vt mbuf 293using external storage has the 294.Dv M_EXT 295flag set. 296.Pp 297The system supplies a macro for allocating the desired external storage 298buffer, 299.Dv MEXTADD . 300.Pp 301The allocation and management of the reference counter is handled by the 302subsystem. 303.Pp 304The system also supplies a default type of external storage buffer called an 305.Vt mbuf cluster . 306.Vt Mbuf clusters 307can be allocated and configured with the use of the 308.Dv MCLGET 309macro. 310Each 311.Vt mbuf cluster 312is 313.Dv MCLBYTES 314in size, where MCLBYTES is a machine-dependent constant. 315The system defines an advisory macro 316.Dv MINCLSIZE , 317which is the smallest amount of data to put into an 318.Vt mbuf cluster . 319It is equal to the sum of 320.Dv MLEN 321and 322.Dv MHLEN . 323It is typically preferable to store data into the data region of an 324.Vt mbuf , 325if size permits, as opposed to allocating a separate 326.Vt mbuf cluster 327to hold the same data. 328.\" 329.Ss Macros and Functions 330There are numerous predefined macros and functions that provide the 331developer with common utilities. 332.\" 333.Bl -ohang -offset indent 334.It Fn mtod mbuf type 335Convert an 336.Fa mbuf 337pointer to a data pointer. 338The macro expands to the data pointer cast to the pointer of the specified 339.Fa type . 340.Sy Note : 341It is advisable to ensure that there is enough contiguous data in 342.Fa mbuf . 343See 344.Fn m_pullup 345for details. 346.It Fn MGET mbuf how type 347Allocate an 348.Vt mbuf 349and initialize it to contain internal data. 350.Fa mbuf 351will point to the allocated 352.Vt mbuf 353on success, or be set to 354.Dv NULL 355on failure. 356The 357.Fa how 358argument is to be set to 359.Dv M_TRYWAIT 360or 361.Dv M_DONTWAIT . 362It specifies whether the caller is willing to block if necessary. 363If 364.Fa how 365is set to 366.Dv M_TRYWAIT , 367a failed allocation will result in the caller being put 368to sleep for a designated 369kern.ipc.mbuf_wait 370.Xr ( sysctl 8 371tunable) 372number of ticks. 373A number of other functions and macros related to 374.Vt mbufs 375have the same argument because they may 376at some point need to allocate new 377.Vt mbufs . 378.Pp 379Programmers should be careful not to confuse the 380.Vt mbuf 381allocation flag 382.Dv M_DONTWAIT 383with the 384.Xr malloc 9 385allocation flag, 386.Dv M_NOWAIT . 387They are not the same. 388.It Fn MGETHDR mbuf how type 389Allocate an 390.Vt mbuf 391and initialize it to contain a packet header 392and internal data. 393See 394.Fn MGET 395for details. 396.It Fn MCLGET mbuf how 397Allocate and attach an 398.Vt mbuf cluster 399to 400.Fa mbuf . 401If the macro fails, the 402.Dv M_EXT 403flag will not be set in 404.Fa mbuf . 405.It Fn M_ALIGN mbuf len 406Set the pointer 407.Fa mbuf->m_data 408to place an object of the size 409.Fa len 410at the end of the internal data area of 411.Fa mbuf , 412long word aligned. 413Applicable only if 414.Fa mbuf 415is newly allocated with 416.Fn MGET 417or 418.Fn m_get . 419.It Fn MH_ALIGN mbuf len 420Serves the same purpose as 421.Fn M_ALIGN 422does, but only for 423.Fa mbuf 424newly allocated with 425.Fn MGETHDR 426or 427.Fn m_gethdr , 428or initialized by 429.Fn m_dup_pkthdr 430or 431.Fn m_move_pkthdr . 432.It Fn m_align mbuf len 433Services the same purpose as 434.Fn M_ALIGN 435but handles any type of mbuf. 436.It Fn M_LEADINGSPACE mbuf 437Returns the number of bytes available before the beginning 438of data in 439.Fa mbuf . 440.It Fn M_TRAILINGSPACE mbuf 441Returns the number of bytes available after the end of data in 442.Fa mbuf . 443.It Fn M_PREPEND mbuf len how 444This macro operates on an 445.Vt mbuf chain . 446It is an optimized wrapper for 447.Fn m_prepend 448that can make use of possible empty space before data 449(e.g.\& left after trimming of a link-layer header). 450The new 451.Vt mbuf chain 452pointer or 453.Dv NULL 454is in 455.Fa mbuf 456after the call. 457.It Fn M_MOVE_PKTHDR to from 458Using this macro is equivalent to calling 459.Fn m_move_pkthdr to from . 460.It Fn M_WRITABLE mbuf 461This macro will evaluate true if 462.Fa mbuf 463is not marked 464.Dv M_RDONLY 465and if either 466.Fa mbuf 467does not contain external storage or, 468if it does, 469then if the reference count of the storage is not greater than 1. 470The 471.Dv M_RDONLY 472flag can be set in 473.Fa mbuf->m_flags . 474This can be achieved during setup of the external storage, 475by passing the 476.Dv M_RDONLY 477bit as a 478.Fa flags 479argument to the 480.Fn MEXTADD 481macro, or can be directly set in individual 482.Vt mbufs . 483.It Fn MCHTYPE mbuf type 484Change the type of 485.Fa mbuf 486to 487.Fa type . 488This is a relatively expensive operation and should be avoided. 489.El 490.Pp 491The functions are: 492.Bl -ohang -offset indent 493.It Fn m_get how type 494A function version of 495.Fn MGET 496for non-critical paths. 497.It Fn m_getm orig len how type 498Allocate 499.Fa len 500bytes worth of 501.Vt mbufs 502and 503.Vt mbuf clusters 504if necessary and append the resulting allocated 505.Vt mbuf chain 506to the 507.Vt mbuf chain 508.Fa orig , 509if it is 510.No non- Ns Dv NULL . 511If the allocation fails at any point, 512free whatever was allocated and return 513.Dv NULL . 514If 515.Fa orig 516is 517.No non- Ns Dv NULL , 518it will not be freed. 519It is possible to use 520.Fn m_getm 521to either append 522.Fa len 523bytes to an existing 524.Vt mbuf 525or 526.Vt mbuf chain 527(for example, one which may be sitting in a pre-allocated ring) 528or to simply perform an all-or-nothing 529.Vt mbuf 530and 531.Vt mbuf cluster 532allocation. 533.It Fn m_gethdr how type 534A function version of 535.Fn MGETHDR 536for non-critical paths. 537.It Fn m_getcl how type flags 538Fetch an 539.Vt mbuf 540with a 541.Vt mbuf cluster 542attached to it. 543If one of the allocations fails, the entire allocation fails. 544This routine is the preferred way of fetching both the 545.Vt mbuf 546and 547.Vt mbuf cluster 548together, as it avoids having to unlock/relock between allocations. 549Returns 550.Dv NULL 551on failure. 552.It Fn m_getclr how type 553Allocate an 554.Vt mbuf 555and zero out the data region. 556.It Fn m_free mbuf 557Frees 558.Vt mbuf . 559Returns 560.Va m_next 561of the freed 562.Vt mbuf . 563.El 564.Pp 565The functions below operate on 566.Vt mbuf chains . 567.Bl -ohang -offset indent 568.It Fn m_freem mbuf 569Free an entire 570.Vt mbuf chain , 571including any external storage. 572.\" 573.It Fn m_adj mbuf len 574Trim 575.Fa len 576bytes from the head of an 577.Vt mbuf chain 578if 579.Fa len 580is positive, from the tail otherwise. 581.\" 582.It Fn m_append mbuf len cp 583Append 584.Vt len 585bytes of data 586.Vt cp 587to the 588.Vt mbuf chain . 589Extend the mbuf chain if the new data does not fit in 590existing space. 591.\" 592.It Fn m_prepend mbuf len how 593Allocate a new 594.Vt mbuf 595and prepend it to the 596.Vt mbuf chain , 597handle 598.Dv M_PKTHDR 599properly. 600.Sy Note : 601It does not allocate any 602.Vt mbuf clusters , 603so 604.Fa len 605must be less than 606.Dv MLEN 607or 608.Dv MHLEN , 609depending on the 610.Dv M_PKTHDR 611flag setting. 612.\" 613.It Fn m_copyup mbuf len dstoff 614Similar to 615.Fn m_pullup 616but copies 617.Fa len 618bytes of data into a new mbuf at 619.Fa dstoff 620bytes into the mbuf. 621The 622.Fa dstoff 623argument aligns the data and leaves room for a link layer header. 624Returns the new 625.Vt mbuf chain 626on success, 627and frees the 628.Vt mbuf chain 629and returns 630.Dv NULL 631on failure. 632.Sy Note : 633The function does not allocate 634.Vt mbuf clusters , 635so 636.Fa len + dstoff 637must be less than 638.Dv MHLEN . 639.\" 640.It Fn m_pullup mbuf len 641Arrange that the first 642.Fa len 643bytes of an 644.Vt mbuf chain 645are contiguous and lay in the data area of 646.Fa mbuf , 647so they are accessible with 648.Fn mtod mbuf type . 649It is important to remember that this may involve 650reallocating some mbufs and moving data so all pointers 651referencing data within the old mbuf chain 652must be recalculated or made invalid. 653Return the new 654.Vt mbuf chain 655on success, 656.Dv NULL 657on failure 658(the 659.Vt mbuf chain 660is freed in this case). 661.Sy Note : 662It does not allocate any 663.Vt mbuf clusters , 664so 665.Fa len 666must be less than 667.Dv MHLEN . 668.\" 669.It Fn m_pulldown mbuf offset len offsetp 670Arrange that 671.Fa len 672bytes between 673.Fa offset 674and 675.Fa offset + len 676in the 677.Vt mbuf chain 678are contiguous and lay in the data area of 679.Fa mbuf , 680so they are accessible with 681.Fn mtod mbuf type . 682.Fa len must be smaller than, or equal to, the size of an 683.Vt mbuf cluster . 684Return a pointer to an intermediate 685.Vt mbuf 686in the chain containing the requested region; 687the offset in the data region of the 688.Vt mbuf chain 689to the data contained in the returned mbuf is stored in 690.Fa *offsetp . 691If 692.Fa offp 693is NULL, the region may be accessed using 694.Fn mtod mbuf type . 695If 696.Fa offp 697is non-NULL, the region may be accessed using 698.Fn mtod mbuf uint8_t + *offsetp . 699The region of the mbuf chain between its beginning and 700.Fa off 701is not modified, therefore it is safe to hold pointers to data within 702this region before calling 703.Fn m_pulldown . 704.\" 705.It Fn m_copym mbuf offset len how 706Make a copy of an 707.Vt mbuf chain 708starting 709.Fa offset 710bytes from the beginning, continuing for 711.Fa len 712bytes. 713If 714.Fa len 715is 716.Dv M_COPYALL , 717copy to the end of the 718.Vt mbuf chain . 719.Sy Note : 720The copy is read-only, because the 721.Vt mbuf clusters 722are not copied, only their reference counts are incremented. 723.\" 724.It Fn m_copypacket mbuf how 725Copy an entire packet including header, which must be present. 726This is an optimized version of the common case 727.Fn m_copym mbuf 0 M_COPYALL how . 728.Sy Note : 729the copy is read-only, because the 730.Vt mbuf clusters 731are not copied, only their reference counts are incremented. 732.\" 733.It Fn m_dup mbuf how 734Copy a packet header 735.Vt mbuf chain 736into a completely new 737.Vt mbuf chain , 738including copying any 739.Vt mbuf clusters . 740Use this instead of 741.Fn m_copypacket 742when you need a writable copy of an 743.Vt mbuf chain . 744.\" 745.It Fn m_copydata mbuf offset len buf 746Copy data from an 747.Vt mbuf chain 748starting 749.Fa off 750bytes from the beginning, continuing for 751.Fa len 752bytes, into the indicated buffer 753.Fa buf . 754.\" 755.It Fn m_copyback mbuf offset len buf 756Copy 757.Fa len 758bytes from the buffer 759.Fa buf 760back into the indicated 761.Vt mbuf chain , 762starting at 763.Fa offset 764bytes from the beginning of the 765.Vt mbuf chain , 766extending the 767.Vt mbuf chain 768if necessary. 769.Sy Note : 770It does not allocate any 771.Vt mbuf clusters , 772just adds 773.Vt mbufs 774to the 775.Vt mbuf chain . 776It is safe to set 777.Fa offset 778beyond the current 779.Vt mbuf chain 780end: zeroed 781.Vt mbufs 782will be allocated to fill the space. 783.\" 784.It Fn m_length mbuf last 785Return the length of the 786.Vt mbuf chain , 787and optionally a pointer to the last 788.Vt mbuf . 789.\" 790.It Fn m_dup_pkthdr to from how 791Upon the function's completion, the 792.Vt mbuf 793.Fa to 794will contain an identical copy of 795.Fa from->m_pkthdr 796and the per-packet attributes found in the 797.Vt mbuf chain 798.Fa from . 799The 800.Vt mbuf 801.Fa from 802must have the flag 803.Dv M_PKTHDR 804initially set, and 805.Fa to 806must be empty on entry. 807.\" 808.It Fn m_move_pkthdr to from 809Move 810.Va m_pkthdr 811and the per-packet attributes from the 812.Vt mbuf chain 813.Fa from 814to the 815.Vt mbuf 816.Fa to . 817The 818.Vt mbuf 819.Fa from 820must have the flag 821.Dv M_PKTHDR 822initially set, and 823.Fa to 824must be empty on entry. 825Upon the function's completion, 826.Fa from 827will have the flag 828.Dv M_PKTHDR 829and the per-packet attributes cleared. 830.\" 831.It Fn m_fixhdr mbuf 832Set the packet-header length to the length of the 833.Vt mbuf chain . 834.\" 835.It Fn m_devget buf len offset ifp copy 836Copy data from a device local memory pointed to by 837.Fa buf 838to an 839.Vt mbuf chain . 840The copy is done using a specified copy routine 841.Fa copy , 842or 843.Fn bcopy 844if 845.Fa copy 846is 847.Dv NULL . 848.\" 849.It Fn m_cat m n 850Concatenate 851.Fa n 852to 853.Fa m . 854Both 855.Vt mbuf chains 856must be of the same type. 857.Fa N 858is still valid after the function returned. 859.Sy Note : 860It does not handle 861.Dv M_PKTHDR 862and friends. 863.\" 864.It Fn m_split mbuf len how 865Partition an 866.Vt mbuf chain 867in two pieces, returning the tail: 868all but the first 869.Fa len 870bytes. 871In case of failure, it returns 872.Dv NULL 873and attempts to restore the 874.Vt mbuf chain 875to its original state. 876.\" 877.It Fn m_apply mbuf off len f arg 878Apply a function to an 879.Vt mbuf chain , 880at offset 881.Fa off , 882for length 883.Fa len 884bytes. 885Typically used to avoid calls to 886.Fn m_pullup 887which would otherwise be unnecessary or undesirable. 888.Fa arg 889is a convenience argument which is passed to the callback function 890.Fa f . 891.Pp 892Each time 893.Fn f 894is called, it will be passed 895.Fa arg , 896a pointer to the 897.Fa data 898in the current mbuf, and the length 899.Fa len 900of the data in this mbuf to which the function should be applied. 901.Pp 902The function should return zero to indicate success; 903otherwise, if an error is indicated, then 904.Fn m_apply 905will return the error and stop iterating through the 906.Vt mbuf chain . 907.\" 908.It Fn m_getptr mbuf loc off 909Return a pointer to the mbuf containing the data located at 910.Fa loc 911bytes from the beginning of the 912.Vt mbuf chain . 913The corresponding offset into the mbuf will be stored in 914.Fa *off . 915.It Fn m_defrag m0 how 916Defragment an mbuf chain, returning the shortest possible 917chain of mbufs and clusters. 918If allocation fails and this can not be completed, 919.Dv NULL 920will be returned and the original chain will be unchanged. 921Upon success, the original chain will be freed and the new 922chain will be returned. 923.Fa how 924should be either 925.Dv M_TRYWAIT 926or 927.Dv M_DONTWAIT , 928depending on the caller's preference. 929.Pp 930This function is especially useful in network drivers, where 931certain long mbuf chains must be shortened before being added 932to TX descriptor lists. 933.It Fn m_unshare m0 how 934Create a version of the specified mbuf chain whose 935contents can be safely modified without affecting other users. 936If allocation fails and this operation can not be completed, 937.Dv NULL 938will be returned. 939The original mbuf chain is always reclaimed and the reference 940count of any shared mbuf clusters is decremented. 941.Fa how 942should be either 943.Dv M_TRYWAIT 944or 945.Dv M_DONTWAIT , 946depending on the caller's preference. 947As a side-effect of this process the returned 948mbuf chain may be compacted. 949.Pp 950This function is especially useful in the transmit path of 951network code, when data must be encrypted or otherwise 952altered prior to transmission. 953.El 954.Sh HARDWARE-ASSISTED CHECKSUM CALCULATION 955This section currently applies to TCP/IP only. 956In order to save the host CPU resources, computing checksums is 957offloaded to the network interface hardware if possible. 958The 959.Va m_pkthdr 960member of the leading 961.Vt mbuf 962of a packet contains two fields used for that purpose, 963.Vt int Va csum_flags 964and 965.Vt int Va csum_data . 966The meaning of those fields depends on the direction a packet flows in, 967and on whether the packet is fragmented. 968Henceforth, 969.Va csum_flags 970or 971.Va csum_data 972of a packet 973will denote the corresponding field of the 974.Va m_pkthdr 975member of the leading 976.Vt mbuf 977in the 978.Vt mbuf chain 979containing the packet. 980.Pp 981On output, checksum offloading is attempted after the outgoing 982interface has been determined for a packet. 983The interface-specific field 984.Va ifnet.if_data.ifi_hwassist 985(see 986.Xr ifnet 9 ) 987is consulted for the capabilities of the interface to assist in 988computing checksums. 989The 990.Va csum_flags 991field of the packet header is set to indicate which actions the interface 992is supposed to perform on it. 993The actions unsupported by the network interface are done in the 994software prior to passing the packet down to the interface driver; 995such actions will never be requested through 996.Va csum_flags . 997.Pp 998The flags demanding a particular action from an interface are as follows: 999.Bl -tag -width ".Dv CSUM_TCP" -offset indent 1000.It Dv CSUM_IP 1001The IP header checksum is to be computed and stored in the 1002corresponding field of the packet. 1003The hardware is expected to know the format of an IP header 1004to determine the offset of the IP checksum field. 1005.It Dv CSUM_TCP 1006The TCP checksum is to be computed. 1007(See below.) 1008.It Dv CSUM_UDP 1009The UDP checksum is to be computed. 1010(See below.) 1011.El 1012.Pp 1013Should a TCP or UDP checksum be offloaded to the hardware, 1014the field 1015.Va csum_data 1016will contain the byte offset of the checksum field relative to the 1017end of the IP header. 1018In this case, the checksum field will be initially 1019set by the TCP/IP module to the checksum of the pseudo header 1020defined by the TCP and UDP specifications. 1021.Pp 1022For outbound packets which have been fragmented 1023by the host CPU, the following will also be true, 1024regardless of the checksum flag settings: 1025.Bl -bullet -offset indent 1026.It 1027all fragments will have the flag 1028.Dv M_FRAG 1029set in their 1030.Va m_flags 1031field; 1032.It 1033the first and the last fragments in the chain will have 1034.Dv M_FIRSTFRAG 1035or 1036.Dv M_LASTFRAG 1037set in their 1038.Va m_flags , 1039correspondingly; 1040.It 1041the first fragment in the chain will have the total number 1042of fragments contained in its 1043.Va csum_data 1044field. 1045.El 1046.Pp 1047The last rule for fragmented packets takes precedence over the one 1048for a TCP or UDP checksum. 1049Nevertheless, offloading a TCP or UDP checksum is possible for a 1050fragmented packet if the flag 1051.Dv CSUM_IP_FRAGS 1052is set in the field 1053.Va ifnet.if_data.ifi_hwassist 1054associated with the network interface. 1055However, in this case the interface is expected to figure out 1056the location of the checksum field within the sequence of fragments 1057by itself because 1058.Va csum_data 1059contains a fragment count instead of a checksum offset value. 1060.Pp 1061On input, an interface indicates the actions it has performed 1062on a packet by setting one or more of the following flags in 1063.Va csum_flags 1064associated with the packet: 1065.Bl -tag -width ".Dv CSUM_IP_CHECKED" -offset indent 1066.It Dv CSUM_IP_CHECKED 1067The IP header checksum has been computed. 1068.It Dv CSUM_IP_VALID 1069The IP header has a valid checksum. 1070This flag can appear only in combination with 1071.Dv CSUM_IP_CHECKED . 1072.It Dv CSUM_DATA_VALID 1073The checksum of the data portion of the IP packet has been computed 1074and stored in the field 1075.Va csum_data 1076in network byte order. 1077.It Dv CSUM_PSEUDO_HDR 1078Can be set only along with 1079.Dv CSUM_DATA_VALID 1080to indicate that the IP data checksum found in 1081.Va csum_data 1082allows for the pseudo header defined by the TCP and UDP specifications. 1083Otherwise the checksum of the pseudo header must be calculated by 1084the host CPU and added to 1085.Va csum_data 1086to obtain the final checksum to be used for TCP or UDP validation purposes. 1087.El 1088.Pp 1089If a particular network interface just indicates success or 1090failure of TCP or UDP checksum validation without returning 1091the exact value of the checksum to the host CPU, its driver can mark 1092.Dv CSUM_DATA_VALID 1093and 1094.Dv CSUM_PSEUDO_HDR 1095in 1096.Va csum_flags , 1097and set 1098.Va csum_data 1099to 1100.Li 0xFFFF 1101hexadecimal to indicate a valid checksum. 1102It is a peculiarity of the algorithm used that the Internet checksum 1103calculated over any valid packet will be 1104.Li 0xFFFF 1105as long as the original checksum field is included. 1106.Pp 1107For inbound packets which are IP fragments, all 1108.Va csum_data 1109fields will be summed during reassembly to obtain the final checksum 1110value passed to an upper layer in the 1111.Va csum_data 1112field of the reassembled packet. 1113The 1114.Va csum_flags 1115fields of all fragments will be consolidated using logical AND 1116to obtain the final value for 1117.Va csum_flags . 1118Thus, in order to successfully 1119offload checksum computation for fragmented data, 1120all fragments should have the same value of 1121.Va csum_flags . 1122.Sh STRESS TESTING 1123When running a kernel compiled with the option 1124.Dv MBUF_STRESS_TEST , 1125the following 1126.Xr sysctl 8 Ns 1127-controlled options may be used to create 1128various failure/extreme cases for testing of network drivers 1129and other parts of the kernel that rely on 1130.Vt mbufs . 1131.Bl -tag -width ident 1132.It Va net.inet.ip.mbuf_frag_size 1133Causes 1134.Fn ip_output 1135to fragment outgoing 1136.Vt mbuf chains 1137into fragments of the specified size. 1138Setting this variable to 1 is an excellent way to 1139test the long 1140.Vt mbuf chain 1141handling ability of network drivers. 1142.It Va kern.ipc.m_defragrandomfailures 1143Causes the function 1144.Fn m_defrag 1145to randomly fail, returning 1146.Dv NULL . 1147Any piece of code which uses 1148.Fn m_defrag 1149should be tested with this feature. 1150.El 1151.Sh RETURN VALUES 1152See above. 1153.Sh SEE ALSO 1154.Xr ifnet 9 , 1155.Xr mbuf_tags 9 1156.Sh HISTORY 1157.\" Please correct me if I'm wrong 1158.Vt Mbufs 1159appeared in an early version of 1160.Bx . 1161Besides being used for network packets, they were used 1162to store various dynamic structures, such as routing table 1163entries, interface addresses, protocol control blocks, etc. 1164In more recent 1165.Fx 1166use of 1167.Vt mbufs 1168is almost entirely limited to packet storage, with 1169.Xr uma 9 1170zones being used directly to store other network-related memory. 1171.Pp 1172Historically, the 1173.Vt mbuf 1174allocator has been a special-purpose memory allocator able to run in 1175interrupt contexts and allocating from a special kernel address space map. 1176As of 1177.Fx 5.3 , 1178the 1179.Vt mbuf 1180allocator is a wrapper around 1181.Xr uma 9 , 1182allowing caching of 1183.Vt mbufs , 1184clusters, and 1185.Vt mbuf 1186+ cluster pairs in per-CPU caches, as well as bringing other benefits of 1187slab allocation. 1188.Sh AUTHORS 1189The original 1190.Nm 1191manual page was written by Yar Tikhiy. 1192The 1193.Xr uma 9 1194.Vt mbuf 1195allocator was written by Bosko Milekic. 1196