1.\" Copyright (c) 2000 FreeBSD Inc. 2.\" All rights reserved. 3.\" 4.\" Redistribution and use in source and binary forms, with or without 5.\" modification, are permitted provided that the following conditions 6.\" are met: 7.\" 1. Redistributions of source code must retain the above copyright 8.\" notice, this list of conditions and the following disclaimer. 9.\" 2. Redistributions in binary form must reproduce the above copyright 10.\" notice, this list of conditions and the following disclaimer in the 11.\" documentation and/or other materials provided with the distribution. 12.\" 13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16.\" ARE DISCLAIMED. IN NO EVENT SHALL [your name] OR CONTRIBUTORS BE LIABLE 17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23.\" SUCH DAMAGE. 24.\" 25.\" $FreeBSD$ 26.\" 27.Dd February 26, 2007 28.Dt MBUF 9 29.Os 30.\" 31.Sh NAME 32.Nm mbuf 33.Nd "memory management in the kernel IPC subsystem" 34.\" 35.Sh SYNOPSIS 36.In sys/param.h 37.In sys/systm.h 38.In sys/mbuf.h 39.\" 40.Ss Mbuf allocation macros 41.Fn MGET "struct mbuf *mbuf" "int how" "short type" 42.Fn MGETHDR "struct mbuf *mbuf" "int how" "short type" 43.Fn MCLGET "struct mbuf *mbuf" "int how" 44.Fo MEXTADD 45.Fa "struct mbuf *mbuf" 46.Fa "caddr_t buf" 47.Fa "u_int size" 48.Fa "void (*free)(void *opt_args)" 49.Fa "void *opt_args" 50.Fa "short flags" 51.Fa "int type" 52.Fc 53.Fn MEXTFREE "struct mbuf *mbuf" 54.Fn MFREE "struct mbuf *mbuf" "struct mbuf *successor" 55.\" 56.Ss Mbuf utility macros 57.Fn mtod "struct mbuf *mbuf" "type" 58.Fn M_ALIGN "struct mbuf *mbuf" "u_int len" 59.Fn MH_ALIGN "struct mbuf *mbuf" "u_int len" 60.Ft int 61.Fn M_LEADINGSPACE "struct mbuf *mbuf" 62.Ft int 63.Fn M_TRAILINGSPACE "struct mbuf *mbuf" 64.Fn M_MOVE_PKTHDR "struct mbuf *to" "struct mbuf *from" 65.Fn M_PREPEND "struct mbuf *mbuf" "int len" "int how" 66.Fn MCHTYPE "struct mbuf *mbuf" "u_int type" 67.Ft int 68.Fn M_WRITABLE "struct mbuf *mbuf" 69.\" 70.Ss Mbuf allocation functions 71.Ft struct mbuf * 72.Fn m_get "int how" "int type" 73.Ft struct mbuf * 74.Fn m_getm "struct mbuf *orig" "int len" "int how" "int type" 75.Ft struct mbuf * 76.Fn m_getcl "int how" "short type" "int flags" 77.Ft struct mbuf * 78.Fn m_getclr "int how" "int type" 79.Ft struct mbuf * 80.Fn m_gethdr "int how" "int type" 81.Ft struct mbuf * 82.Fn m_free "struct mbuf *mbuf" 83.Ft void 84.Fn m_freem "struct mbuf *mbuf" 85.\" 86.Ss Mbuf utility functions 87.Ft void 88.Fn m_adj "struct mbuf *mbuf" "int len" 89.Ft void 90.Fn m_align "struct mbuf *mbuf" "int len" 91.Ft int 92.Fn m_append "struct mbuf *mbuf" "int len" "c_caddr_t cp" 93.Ft struct mbuf * 94.Fn m_prepend "struct mbuf *mbuf" "int len" "int how" 95.Ft struct mbuf * 96.Fn m_copyup "struct mbuf *mbuf" "int len" "int dstoff" 97.Ft struct mbuf * 98.Fn m_pullup "struct mbuf *mbuf" "int len" 99.Ft struct mbuf * 100.Fn m_pulldown "struct mbuf *mbuf" "int offset" "int len" "int *offsetp" 101.Ft struct mbuf * 102.Fn m_copym "struct mbuf *mbuf" "int offset" "int len" "int how" 103.Ft struct mbuf * 104.Fn m_copypacket "struct mbuf *mbuf" "int how" 105.Ft struct mbuf * 106.Fn m_dup "struct mbuf *mbuf" "int how" 107.Ft void 108.Fn m_copydata "const struct mbuf *mbuf" "int offset" "int len" "caddr_t buf" 109.Ft void 110.Fn m_copyback "struct mbuf *mbuf" "int offset" "int len" "caddr_t buf" 111.Ft struct mbuf * 112.Fo m_devget 113.Fa "char *buf" 114.Fa "int len" 115.Fa "int offset" 116.Fa "struct ifnet *ifp" 117.Fa "void (*copy)(char *from, caddr_t to, u_int len)" 118.Fc 119.Ft void 120.Fn m_cat "struct mbuf *m" "struct mbuf *n" 121.Ft u_int 122.Fn m_fixhdr "struct mbuf *mbuf" 123.Ft void 124.Fn m_dup_pkthdr "struct mbuf *to" "struct mbuf *from" 125.Ft void 126.Fn m_move_pkthdr "struct mbuf *to" "struct mbuf *from" 127.Ft u_int 128.Fn m_length "struct mbuf *mbuf" "struct mbuf **last" 129.Ft struct mbuf * 130.Fn m_split "struct mbuf *mbuf" "int len" "int how" 131.Ft int 132.Fn m_apply "struct mbuf *mbuf" "int off" "int len" "int (*f)(void *arg, void *data, u_int len)" "void *arg" 133.Ft struct mbuf * 134.Fn m_getptr "struct mbuf *mbuf" "int loc" "int *off" 135.Ft struct mbuf * 136.Fn m_defrag "struct mbuf *m0" "int how" 137.Ft struct mbuf * 138.Fn m_unshare "struct mbuf *m0" "int how" 139.\" 140.Sh DESCRIPTION 141An 142.Vt mbuf 143is a basic unit of memory management in the kernel IPC subsystem. 144Network packets and socket buffers are stored in 145.Vt mbufs . 146A network packet may span multiple 147.Vt mbufs 148arranged into a 149.Vt mbuf chain 150(linked list), 151which allows adding or trimming 152network headers with little overhead. 153.Pp 154While a developer should not bother with 155.Vt mbuf 156internals without serious 157reason in order to avoid incompatibilities with future changes, it 158is useful to understand the general structure of an 159.Vt mbuf . 160.Pp 161An 162.Vt mbuf 163consists of a variable-sized header and a small internal 164buffer for data. 165The total size of an 166.Vt mbuf , 167.Dv MSIZE , 168is a constant defined in 169.In sys/param.h . 170The 171.Vt mbuf 172header includes: 173.Pp 174.Bl -tag -width "m_nextpkt" -offset indent 175.It Va m_next 176.Pq Vt struct mbuf * 177A pointer to the next 178.Vt mbuf 179in the 180.Vt mbuf chain . 181.It Va m_nextpkt 182.Pq Vt struct mbuf * 183A pointer to the next 184.Vt mbuf chain 185in the queue. 186.It Va m_data 187.Pq Vt caddr_t 188A pointer to data attached to this 189.Vt mbuf . 190.It Va m_len 191.Pq Vt int 192The length of the data. 193.It Va m_type 194.Pq Vt short 195The type of the data. 196.It Va m_flags 197.Pq Vt int 198The 199.Vt mbuf 200flags. 201.El 202.Pp 203The 204.Vt mbuf 205flag bits are defined as follows: 206.Bd -literal 207/* mbuf flags */ 208#define M_EXT 0x0001 /* has associated external storage */ 209#define M_PKTHDR 0x0002 /* start of record */ 210#define M_EOR 0x0004 /* end of record */ 211#define M_RDONLY 0x0008 /* associated data marked read-only */ 212#define M_PROTO1 0x0010 /* protocol-specific */ 213#define M_PROTO2 0x0020 /* protocol-specific */ 214#define M_PROTO3 0x0040 /* protocol-specific */ 215#define M_PROTO4 0x0080 /* protocol-specific */ 216#define M_PROTO5 0x0100 /* protocol-specific */ 217#define M_PROTO6 0x4000 /* protocol-specific (avoid M_BCAST conflict) */ 218#define M_FREELIST 0x8000 /* mbuf is on the free list */ 219 220/* mbuf pkthdr flags (also stored in m_flags) */ 221#define M_BCAST 0x0200 /* send/received as link-level broadcast */ 222#define M_MCAST 0x0400 /* send/received as link-level multicast */ 223#define M_FRAG 0x0800 /* packet is fragment of larger packet */ 224#define M_FIRSTFRAG 0x1000 /* packet is first fragment */ 225#define M_LASTFRAG 0x2000 /* packet is last fragment */ 226.Ed 227.Pp 228The available 229.Vt mbuf 230types are defined as follows: 231.Bd -literal 232/* mbuf types */ 233#define MT_DATA 1 /* dynamic (data) allocation */ 234#define MT_HEADER MT_DATA /* packet header */ 235#define MT_SONAME 8 /* socket name */ 236#define MT_CONTROL 14 /* extra-data protocol message */ 237#define MT_OOBDATA 15 /* expedited data */ 238.Ed 239.Pp 240If the 241.Dv M_PKTHDR 242flag is set, a 243.Vt struct pkthdr Va m_pkthdr 244is added to the 245.Vt mbuf 246header. 247It contains a pointer to the interface 248the packet has been received from 249.Pq Vt struct ifnet Va *rcvif , 250and the total packet length 251.Pq Vt int Va len . 252Optionally, it may also contain an attached list of packet tags 253.Pq Vt "struct m_tag" . 254See 255.Xr mbuf_tags 9 256for details. 257Fields used in offloading checksum calculation to the hardware are kept in 258.Va m_pkthdr 259as well. 260See 261.Sx HARDWARE-ASSISTED CHECKSUM CALCULATION 262for details. 263.Pp 264If small enough, data is stored in the internal data buffer of an 265.Vt mbuf . 266If the data is sufficiently large, another 267.Vt mbuf 268may be added to the 269.Vt mbuf chain , 270or external storage may be associated with the 271.Vt mbuf . 272.Dv MHLEN 273bytes of data can fit into an 274.Vt mbuf 275with the 276.Dv M_PKTHDR 277flag set, 278.Dv MLEN 279bytes can otherwise. 280.Pp 281If external storage is being associated with an 282.Vt mbuf , 283the 284.Va m_ext 285header is added at the cost of losing the internal data buffer. 286It includes a pointer to external storage, the size of the storage, 287a pointer to a function used for freeing the storage, 288a pointer to an optional argument that can be passed to the function, 289and a pointer to a reference counter. 290An 291.Vt mbuf 292using external storage has the 293.Dv M_EXT 294flag set. 295.Pp 296The system supplies a macro for allocating the desired external storage 297buffer, 298.Dv MEXTADD . 299.Pp 300The allocation and management of the reference counter is handled by the 301subsystem. 302.Pp 303The system also supplies a default type of external storage buffer called an 304.Vt mbuf cluster . 305.Vt Mbuf clusters 306can be allocated and configured with the use of the 307.Dv MCLGET 308macro. 309Each 310.Vt mbuf cluster 311is 312.Dv MCLBYTES 313in size, where MCLBYTES is a machine-dependent constant. 314The system defines an advisory macro 315.Dv MINCLSIZE , 316which is the smallest amount of data to put into an 317.Vt mbuf cluster . 318It is equal to the sum of 319.Dv MLEN 320and 321.Dv MHLEN . 322It is typically preferable to store data into the data region of an 323.Vt mbuf , 324if size permits, as opposed to allocating a separate 325.Vt mbuf cluster 326to hold the same data. 327.\" 328.Ss Macros and Functions 329There are numerous predefined macros and functions that provide the 330developer with common utilities. 331.\" 332.Bl -ohang -offset indent 333.It Fn mtod mbuf type 334Convert an 335.Fa mbuf 336pointer to a data pointer. 337The macro expands to the data pointer cast to the pointer of the specified 338.Fa type . 339.Sy Note : 340It is advisable to ensure that there is enough contiguous data in 341.Fa mbuf . 342See 343.Fn m_pullup 344for details. 345.It Fn MGET mbuf how type 346Allocate an 347.Vt mbuf 348and initialize it to contain internal data. 349.Fa mbuf 350will point to the allocated 351.Vt mbuf 352on success, or be set to 353.Dv NULL 354on failure. 355The 356.Fa how 357argument is to be set to 358.Dv M_TRYWAIT 359or 360.Dv M_DONTWAIT . 361It specifies whether the caller is willing to block if necessary. 362If 363.Fa how 364is set to 365.Dv M_TRYWAIT , 366a failed allocation will result in the caller being put 367to sleep for a designated 368kern.ipc.mbuf_wait 369.Xr ( sysctl 8 370tunable) 371number of ticks. 372A number of other functions and macros related to 373.Vt mbufs 374have the same argument because they may 375at some point need to allocate new 376.Vt mbufs . 377.Pp 378Programmers should be careful not to confuse the 379.Vt mbuf 380allocation flag 381.Dv M_DONTWAIT 382with the 383.Xr malloc 9 384allocation flag, 385.Dv M_NOWAIT . 386They are not the same. 387.It Fn MGETHDR mbuf how type 388Allocate an 389.Vt mbuf 390and initialize it to contain a packet header 391and internal data. 392See 393.Fn MGET 394for details. 395.It Fn MCLGET mbuf how 396Allocate and attach an 397.Vt mbuf cluster 398to 399.Fa mbuf . 400If the macro fails, the 401.Dv M_EXT 402flag will not be set in 403.Fa mbuf . 404.It Fn M_ALIGN mbuf len 405Set the pointer 406.Fa mbuf->m_data 407to place an object of the size 408.Fa len 409at the end of the internal data area of 410.Fa mbuf , 411long word aligned. 412Applicable only if 413.Fa mbuf 414is newly allocated with 415.Fn MGET 416or 417.Fn m_get . 418.It Fn MH_ALIGN mbuf len 419Serves the same purpose as 420.Fn M_ALIGN 421does, but only for 422.Fa mbuf 423newly allocated with 424.Fn MGETHDR 425or 426.Fn m_gethdr , 427or initialized by 428.Fn m_dup_pkthdr 429or 430.Fn m_move_pkthdr . 431.It Fn m_align mbuf len 432Services the same purpose as 433.Fn M_ALIGN 434but handles any type of mbuf. 435.It Fn M_LEADINGSPACE mbuf 436Returns the number of bytes available before the beginning 437of data in 438.Fa mbuf . 439.It Fn M_TRAILINGSPACE mbuf 440Returns the number of bytes available after the end of data in 441.Fa mbuf . 442.It Fn M_PREPEND mbuf len how 443This macro operates on an 444.Vt mbuf chain . 445It is an optimized wrapper for 446.Fn m_prepend 447that can make use of possible empty space before data 448(e.g.\& left after trimming of a link-layer header). 449The new 450.Vt mbuf chain 451pointer or 452.Dv NULL 453is in 454.Fa mbuf 455after the call. 456.It Fn M_MOVE_PKTHDR to from 457Using this macro is equivalent to calling 458.Fn m_move_pkthdr to from . 459.It Fn M_WRITABLE mbuf 460This macro will evaluate true if 461.Fa mbuf 462is not marked 463.Dv M_RDONLY 464and if either 465.Fa mbuf 466does not contain external storage or, 467if it does, 468then if the reference count of the storage is not greater than 1. 469The 470.Dv M_RDONLY 471flag can be set in 472.Fa mbuf->m_flags . 473This can be achieved during setup of the external storage, 474by passing the 475.Dv M_RDONLY 476bit as a 477.Fa flags 478argument to the 479.Fn MEXTADD 480macro, or can be directly set in individual 481.Vt mbufs . 482.It Fn MCHTYPE mbuf type 483Change the type of 484.Fa mbuf 485to 486.Fa type . 487This is a relatively expensive operation and should be avoided. 488.El 489.Pp 490The functions are: 491.Bl -ohang -offset indent 492.It Fn m_get how type 493A function version of 494.Fn MGET 495for non-critical paths. 496.It Fn m_getm orig len how type 497Allocate 498.Fa len 499bytes worth of 500.Vt mbufs 501and 502.Vt mbuf clusters 503if necessary and append the resulting allocated 504.Vt mbuf chain 505to the 506.Vt mbuf chain 507.Fa orig , 508if it is 509.No non- Ns Dv NULL . 510If the allocation fails at any point, 511free whatever was allocated and return 512.Dv NULL . 513If 514.Fa orig 515is 516.No non- Ns Dv NULL , 517it will not be freed. 518It is possible to use 519.Fn m_getm 520to either append 521.Fa len 522bytes to an existing 523.Vt mbuf 524or 525.Vt mbuf chain 526(for example, one which may be sitting in a pre-allocated ring) 527or to simply perform an all-or-nothing 528.Vt mbuf 529and 530.Vt mbuf cluster 531allocation. 532.It Fn m_gethdr how type 533A function version of 534.Fn MGETHDR 535for non-critical paths. 536.It Fn m_getcl how type flags 537Fetch an 538.Vt mbuf 539with a 540.Vt mbuf cluster 541attached to it. 542If one of the allocations fails, the entire allocation fails. 543This routine is the preferred way of fetching both the 544.Vt mbuf 545and 546.Vt mbuf cluster 547together, as it avoids having to unlock/relock between allocations. 548Returns 549.Dv NULL 550on failure. 551.It Fn m_getclr how type 552Allocate an 553.Vt mbuf 554and zero out the data region. 555.It Fn m_free mbuf 556Frees 557.Vt mbuf . 558Returns 559.Va m_next 560of the freed 561.Vt mbuf . 562.El 563.Pp 564The functions below operate on 565.Vt mbuf chains . 566.Bl -ohang -offset indent 567.It Fn m_freem mbuf 568Free an entire 569.Vt mbuf chain , 570including any external storage. 571.\" 572.It Fn m_adj mbuf len 573Trim 574.Fa len 575bytes from the head of an 576.Vt mbuf chain 577if 578.Fa len 579is positive, from the tail otherwise. 580.\" 581.It Fn m_append mbuf len cp 582Append 583.Vt len 584bytes of data 585.Vt cp 586to the 587.Vt mbuf chain . 588Extend the mbuf chain if the new data does not fit in 589existing space. 590.\" 591.It Fn m_prepend mbuf len how 592Allocate a new 593.Vt mbuf 594and prepend it to the 595.Vt mbuf chain , 596handle 597.Dv M_PKTHDR 598properly. 599.Sy Note : 600It does not allocate any 601.Vt mbuf clusters , 602so 603.Fa len 604must be less than 605.Dv MLEN 606or 607.Dv MHLEN , 608depending on the 609.Dv M_PKTHDR 610flag setting. 611.\" 612.It Fn m_copyup mbuf len dstoff 613Similar to 614.Fn m_pullup 615but copies 616.Fa len 617bytes of data into a new mbuf at 618.Fa dstoff 619bytes into the mbuf. 620The 621.Fa dstoff 622argument aligns the data and leaves room for a link layer header. 623Returns the new 624.Vt mbuf chain 625on success, 626and frees the 627.Vt mbuf chain 628and returns 629.Dv NULL 630on failure. 631.Sy Note : 632The function does not allocate 633.Vt mbuf clusters , 634so 635.Fa len + dstoff 636must be less than 637.Dv MHLEN . 638.\" 639.It Fn m_pullup mbuf len 640Arrange that the first 641.Fa len 642bytes of an 643.Vt mbuf chain 644are contiguous and lay in the data area of 645.Fa mbuf , 646so they are accessible with 647.Fn mtod mbuf type . 648It is important to remember that this may involve 649reallocating some mbufs and moving data so all pointers 650referencing data within the old mbuf chain 651must be recalculated or made invalid. 652Return the new 653.Vt mbuf chain 654on success, 655.Dv NULL 656on failure 657(the 658.Vt mbuf chain 659is freed in this case). 660.Sy Note : 661It does not allocate any 662.Vt mbuf clusters , 663so 664.Fa len 665must be less than 666.Dv MHLEN . 667.\" 668.It Fn m_pulldown mbuf offset len offsetp 669Arrange that 670.Fa len 671bytes between 672.Fa offset 673and 674.Fa offset + len 675in the 676.Vt mbuf chain 677are contiguous and lay in the data area of 678.Fa mbuf , 679so they are accessible with 680.Fn mtod mbuf type . 681.Fa len must be smaller than, or equal to, the size of an 682.Vt mbuf cluster . 683Return a pointer to an intermediate 684.Vt mbuf 685in the chain containing the requested region; 686the offset in the data region of the 687.Vt mbuf chain 688to the data contained in the returned mbuf is stored in 689.Fa *offsetp . 690If 691.Fa offp 692is NULL, the region may be accessed using 693.Fn mtod mbuf type . 694If 695.Fa offp 696is non-NULL, the region may be accessed using 697.Fn mtod mbuf uint8_t + *offsetp . 698The region of the mbuf chain between its beginning and 699.Fa off 700is not modified, therefore it is safe to hold pointers to data within 701this region before calling 702.Fn m_pulldown . 703.\" 704.It Fn m_copym mbuf offset len how 705Make a copy of an 706.Vt mbuf chain 707starting 708.Fa offset 709bytes from the beginning, continuing for 710.Fa len 711bytes. 712If 713.Fa len 714is 715.Dv M_COPYALL , 716copy to the end of the 717.Vt mbuf chain . 718.Sy Note : 719The copy is read-only, because the 720.Vt mbuf clusters 721are not copied, only their reference counts are incremented. 722.\" 723.It Fn m_copypacket mbuf how 724Copy an entire packet including header, which must be present. 725This is an optimized version of the common case 726.Fn m_copym mbuf 0 M_COPYALL how . 727.Sy Note : 728the copy is read-only, because the 729.Vt mbuf clusters 730are not copied, only their reference counts are incremented. 731.\" 732.It Fn m_dup mbuf how 733Copy a packet header 734.Vt mbuf chain 735into a completely new 736.Vt mbuf chain , 737including copying any 738.Vt mbuf clusters . 739Use this instead of 740.Fn m_copypacket 741when you need a writable copy of an 742.Vt mbuf chain . 743.\" 744.It Fn m_copydata mbuf offset len buf 745Copy data from an 746.Vt mbuf chain 747starting 748.Fa off 749bytes from the beginning, continuing for 750.Fa len 751bytes, into the indicated buffer 752.Fa buf . 753.\" 754.It Fn m_copyback mbuf offset len buf 755Copy 756.Fa len 757bytes from the buffer 758.Fa buf 759back into the indicated 760.Vt mbuf chain , 761starting at 762.Fa offset 763bytes from the beginning of the 764.Vt mbuf chain , 765extending the 766.Vt mbuf chain 767if necessary. 768.Sy Note : 769It does not allocate any 770.Vt mbuf clusters , 771just adds 772.Vt mbufs 773to the 774.Vt mbuf chain . 775It is safe to set 776.Fa offset 777beyond the current 778.Vt mbuf chain 779end: zeroed 780.Vt mbufs 781will be allocated to fill the space. 782.\" 783.It Fn m_length mbuf last 784Return the length of the 785.Vt mbuf chain , 786and optionally a pointer to the last 787.Vt mbuf . 788.\" 789.It Fn m_dup_pkthdr to from how 790Upon the function's completion, the 791.Vt mbuf 792.Fa to 793will contain an identical copy of 794.Fa from->m_pkthdr 795and the per-packet attributes found in the 796.Vt mbuf chain 797.Fa from . 798The 799.Vt mbuf 800.Fa from 801must have the flag 802.Dv M_PKTHDR 803initially set, and 804.Fa to 805must be empty on entry. 806.\" 807.It Fn m_move_pkthdr to from 808Move 809.Va m_pkthdr 810and the per-packet attributes from the 811.Vt mbuf chain 812.Fa from 813to the 814.Vt mbuf 815.Fa to . 816The 817.Vt mbuf 818.Fa from 819must have the flag 820.Dv M_PKTHDR 821initially set, and 822.Fa to 823must be empty on entry. 824Upon the function's completion, 825.Fa from 826will have the flag 827.Dv M_PKTHDR 828and the per-packet attributes cleared. 829.\" 830.It Fn m_fixhdr mbuf 831Set the packet-header length to the length of the 832.Vt mbuf chain . 833.\" 834.It Fn m_devget buf len offset ifp copy 835Copy data from a device local memory pointed to by 836.Fa buf 837to an 838.Vt mbuf chain . 839The copy is done using a specified copy routine 840.Fa copy , 841or 842.Fn bcopy 843if 844.Fa copy 845is 846.Dv NULL . 847.\" 848.It Fn m_cat m n 849Concatenate 850.Fa n 851to 852.Fa m . 853Both 854.Vt mbuf chains 855must be of the same type. 856.Fa N 857is still valid after the function returned. 858.Sy Note : 859It does not handle 860.Dv M_PKTHDR 861and friends. 862.\" 863.It Fn m_split mbuf len how 864Partition an 865.Vt mbuf chain 866in two pieces, returning the tail: 867all but the first 868.Fa len 869bytes. 870In case of failure, it returns 871.Dv NULL 872and attempts to restore the 873.Vt mbuf chain 874to its original state. 875.\" 876.It Fn m_apply mbuf off len f arg 877Apply a function to an 878.Vt mbuf chain , 879at offset 880.Fa off , 881for length 882.Fa len 883bytes. 884Typically used to avoid calls to 885.Fn m_pullup 886which would otherwise be unnecessary or undesirable. 887.Fa arg 888is a convenience argument which is passed to the callback function 889.Fa f . 890.Pp 891Each time 892.Fn f 893is called, it will be passed 894.Fa arg , 895a pointer to the 896.Fa data 897in the current mbuf, and the length 898.Fa len 899of the data in this mbuf to which the function should be applied. 900.Pp 901The function should return zero to indicate success; 902otherwise, if an error is indicated, then 903.Fn m_apply 904will return the error and stop iterating through the 905.Vt mbuf chain . 906.\" 907.It Fn m_getptr mbuf loc off 908Return a pointer to the mbuf containing the data located at 909.Fa loc 910bytes from the beginning of the 911.Vt mbuf chain . 912The corresponding offset into the mbuf will be stored in 913.Fa *off . 914.It Fn m_defrag m0 how 915Defragment an mbuf chain, returning the shortest possible 916chain of mbufs and clusters. 917If allocation fails and this can not be completed, 918.Dv NULL 919will be returned and the original chain will be unchanged. 920Upon success, the original chain will be freed and the new 921chain will be returned. 922.Fa how 923should be either 924.Dv M_TRYWAIT 925or 926.Dv M_DONTWAIT , 927depending on the caller's preference. 928.Pp 929This function is especially useful in network drivers, where 930certain long mbuf chains must be shortened before being added 931to TX descriptor lists. 932.It Fn m_unshare m0 how 933Create a version of the specified mbuf chain whose 934contents can be safely modified without affecting other users. 935If allocation fails and this operation can not be completed, 936.Dv NULL 937will be returned. 938The original mbuf chain is always reclaimed and the reference 939count of any shared mbuf clusters is decremented. 940.Fa how 941should be either 942.Dv M_TRYWAIT 943or 944.Dv M_DONTWAIT , 945depending on the caller's preference. 946As a side-effect of this process the returned 947mbuf chain may be compacted. 948.Pp 949This function is especially useful in the transmit path of 950network code, when data must be encrypted or otherwise 951altered prior to transmission. 952.El 953.Sh HARDWARE-ASSISTED CHECKSUM CALCULATION 954This section currently applies to TCP/IP only. 955In order to save the host CPU resources, computing checksums is 956offloaded to the network interface hardware if possible. 957The 958.Va m_pkthdr 959member of the leading 960.Vt mbuf 961of a packet contains two fields used for that purpose, 962.Vt int Va csum_flags 963and 964.Vt int Va csum_data . 965The meaning of those fields depends on the direction a packet flows in, 966and on whether the packet is fragmented. 967Henceforth, 968.Va csum_flags 969or 970.Va csum_data 971of a packet 972will denote the corresponding field of the 973.Va m_pkthdr 974member of the leading 975.Vt mbuf 976in the 977.Vt mbuf chain 978containing the packet. 979.Pp 980On output, checksum offloading is attempted after the outgoing 981interface has been determined for a packet. 982The interface-specific field 983.Va ifnet.if_data.ifi_hwassist 984(see 985.Xr ifnet 9 ) 986is consulted for the capabilities of the interface to assist in 987computing checksums. 988The 989.Va csum_flags 990field of the packet header is set to indicate which actions the interface 991is supposed to perform on it. 992The actions unsupported by the network interface are done in the 993software prior to passing the packet down to the interface driver; 994such actions will never be requested through 995.Va csum_flags . 996.Pp 997The flags demanding a particular action from an interface are as follows: 998.Bl -tag -width ".Dv CSUM_TCP" -offset indent 999.It Dv CSUM_IP 1000The IP header checksum is to be computed and stored in the 1001corresponding field of the packet. 1002The hardware is expected to know the format of an IP header 1003to determine the offset of the IP checksum field. 1004.It Dv CSUM_TCP 1005The TCP checksum is to be computed. 1006(See below.) 1007.It Dv CSUM_UDP 1008The UDP checksum is to be computed. 1009(See below.) 1010.El 1011.Pp 1012Should a TCP or UDP checksum be offloaded to the hardware, 1013the field 1014.Va csum_data 1015will contain the byte offset of the checksum field relative to the 1016end of the IP header. 1017In this case, the checksum field will be initially 1018set by the TCP/IP module to the checksum of the pseudo header 1019defined by the TCP and UDP specifications. 1020.Pp 1021For outbound packets which have been fragmented 1022by the host CPU, the following will also be true, 1023regardless of the checksum flag settings: 1024.Bl -bullet -offset indent 1025.It 1026all fragments will have the flag 1027.Dv M_FRAG 1028set in their 1029.Va m_flags 1030field; 1031.It 1032the first and the last fragments in the chain will have 1033.Dv M_FIRSTFRAG 1034or 1035.Dv M_LASTFRAG 1036set in their 1037.Va m_flags , 1038correspondingly; 1039.It 1040the first fragment in the chain will have the total number 1041of fragments contained in its 1042.Va csum_data 1043field. 1044.El 1045.Pp 1046The last rule for fragmented packets takes precedence over the one 1047for a TCP or UDP checksum. 1048Nevertheless, offloading a TCP or UDP checksum is possible for a 1049fragmented packet if the flag 1050.Dv CSUM_IP_FRAGS 1051is set in the field 1052.Va ifnet.if_data.ifi_hwassist 1053associated with the network interface. 1054However, in this case the interface is expected to figure out 1055the location of the checksum field within the sequence of fragments 1056by itself because 1057.Va csum_data 1058contains a fragment count instead of a checksum offset value. 1059.Pp 1060On input, an interface indicates the actions it has performed 1061on a packet by setting one or more of the following flags in 1062.Va csum_flags 1063associated with the packet: 1064.Bl -tag -width ".Dv CSUM_IP_CHECKED" -offset indent 1065.It Dv CSUM_IP_CHECKED 1066The IP header checksum has been computed. 1067.It Dv CSUM_IP_VALID 1068The IP header has a valid checksum. 1069This flag can appear only in combination with 1070.Dv CSUM_IP_CHECKED . 1071.It Dv CSUM_DATA_VALID 1072The checksum of the data portion of the IP packet has been computed 1073and stored in the field 1074.Va csum_data 1075in network byte order. 1076.It Dv CSUM_PSEUDO_HDR 1077Can be set only along with 1078.Dv CSUM_DATA_VALID 1079to indicate that the IP data checksum found in 1080.Va csum_data 1081allows for the pseudo header defined by the TCP and UDP specifications. 1082Otherwise the checksum of the pseudo header must be calculated by 1083the host CPU and added to 1084.Va csum_data 1085to obtain the final checksum to be used for TCP or UDP validation purposes. 1086.El 1087.Pp 1088If a particular network interface just indicates success or 1089failure of TCP or UDP checksum validation without returning 1090the exact value of the checksum to the host CPU, its driver can mark 1091.Dv CSUM_DATA_VALID 1092and 1093.Dv CSUM_PSEUDO_HDR 1094in 1095.Va csum_flags , 1096and set 1097.Va csum_data 1098to 1099.Li 0xFFFF 1100hexadecimal to indicate a valid checksum. 1101It is a peculiarity of the algorithm used that the Internet checksum 1102calculated over any valid packet will be 1103.Li 0xFFFF 1104as long as the original checksum field is included. 1105.Pp 1106For inbound packets which are IP fragments, all 1107.Va csum_data 1108fields will be summed during reassembly to obtain the final checksum 1109value passed to an upper layer in the 1110.Va csum_data 1111field of the reassembled packet. 1112The 1113.Va csum_flags 1114fields of all fragments will be consolidated using logical AND 1115to obtain the final value for 1116.Va csum_flags . 1117Thus, in order to successfully 1118offload checksum computation for fragmented data, 1119all fragments should have the same value of 1120.Va csum_flags . 1121.Sh STRESS TESTING 1122When running a kernel compiled with the option 1123.Dv MBUF_STRESS_TEST , 1124the following 1125.Xr sysctl 8 Ns 1126-controlled options may be used to create 1127various failure/extreme cases for testing of network drivers 1128and other parts of the kernel that rely on 1129.Vt mbufs . 1130.Bl -tag -width ident 1131.It Va net.inet.ip.mbuf_frag_size 1132Causes 1133.Fn ip_output 1134to fragment outgoing 1135.Vt mbuf chains 1136into fragments of the specified size. 1137Setting this variable to 1 is an excellent way to 1138test the long 1139.Vt mbuf chain 1140handling ability of network drivers. 1141.It Va kern.ipc.m_defragrandomfailures 1142Causes the function 1143.Fn m_defrag 1144to randomly fail, returning 1145.Dv NULL . 1146Any piece of code which uses 1147.Fn m_defrag 1148should be tested with this feature. 1149.El 1150.Sh RETURN VALUES 1151See above. 1152.Sh SEE ALSO 1153.Xr ifnet 9 , 1154.Xr mbuf_tags 9 1155.Sh HISTORY 1156.\" Please correct me if I'm wrong 1157.Vt Mbufs 1158appeared in an early version of 1159.Bx . 1160Besides being used for network packets, they were used 1161to store various dynamic structures, such as routing table 1162entries, interface addresses, protocol control blocks, etc. 1163In more recent 1164.Fx 1165use of 1166.Vt mbufs 1167is almost entirely limited to packet storage, with 1168.Xr uma 9 1169zones being used directly to store other network-related memory. 1170.Pp 1171Historically, the 1172.Vt mbuf 1173allocator has been a special-purpose memory allocator able to run in 1174interrupt contexts and allocating from a special kernel address space map. 1175As of 1176.Fx 5.3 , 1177the 1178.Vt mbuf 1179allocator is a wrapper around 1180.Xr uma 9 , 1181allowing caching of 1182.Vt mbufs , 1183clusters, and 1184.Vt mbuf 1185+ cluster pairs in per-CPU caches, as well as bringing other benefits of 1186slab allocation. 1187.Sh AUTHORS 1188The original 1189.Nm 1190manual page was written by Yar Tikhiy. 1191The 1192.Xr uma 9 1193.Vt mbuf 1194allocator was written by Bosko Milekic. 1195