1.\" Copyright (c) 2000 FreeBSD Inc. 2.\" All rights reserved. 3.\" 4.\" Redistribution and use in source and binary forms, with or without 5.\" modification, are permitted provided that the following conditions 6.\" are met: 7.\" 1. Redistributions of source code must retain the above copyright 8.\" notice, this list of conditions and the following disclaimer. 9.\" 2. Redistributions in binary form must reproduce the above copyright 10.\" notice, this list of conditions and the following disclaimer in the 11.\" documentation and/or other materials provided with the distribution. 12.\" 13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16.\" ARE DISCLAIMED. IN NO EVENT SHALL [your name] OR CONTRIBUTORS BE LIABLE 17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23.\" SUCH DAMAGE. 24.\" 25.\" $FreeBSD$ 26.\" 27.Dd March 15, 2006 28.Dt MBUF 9 29.Os 30.\" 31.Sh NAME 32.Nm mbuf 33.Nd "memory management in the kernel IPC subsystem" 34.\" 35.Sh SYNOPSIS 36.In sys/param.h 37.In sys/systm.h 38.In sys/mbuf.h 39.\" 40.Ss Mbuf allocation macros 41.Fn MGET "struct mbuf *mbuf" "int how" "short type" 42.Fn MGETHDR "struct mbuf *mbuf" "int how" "short type" 43.Fn MCLGET "struct mbuf *mbuf" "int how" 44.Fo MEXTADD 45.Fa "struct mbuf *mbuf" 46.Fa "caddr_t buf" 47.Fa "u_int size" 48.Fa "void (*free)(void *opt_args)" 49.Fa "void *opt_args" 50.Fa "short flags" 51.Fa "int type" 52.Fc 53.Fn MEXTFREE "struct mbuf *mbuf" 54.Fn MFREE "struct mbuf *mbuf" "struct mbuf *successor" 55.\" 56.Ss Mbuf utility macros 57.Fn mtod "struct mbuf *mbuf" "type" 58.Fn M_ALIGN "struct mbuf *mbuf" "u_int len" 59.Fn MH_ALIGN "struct mbuf *mbuf" "u_int len" 60.Ft int 61.Fn M_LEADINGSPACE "struct mbuf *mbuf" 62.Ft int 63.Fn M_TRAILINGSPACE "struct mbuf *mbuf" 64.Fn M_MOVE_PKTHDR "struct mbuf *to" "struct mbuf *from" 65.Fn M_PREPEND "struct mbuf *mbuf" "int len" "int how" 66.Fn MCHTYPE "struct mbuf *mbuf" "u_int type" 67.Ft int 68.Fn M_WRITABLE "struct mbuf *mbuf" 69.\" 70.Ss Mbuf allocation functions 71.Ft struct mbuf * 72.Fn m_get "int how" "int type" 73.Ft struct mbuf * 74.Fn m_getm "struct mbuf *orig" "int len" "int how" "int type" 75.Ft struct mbuf * 76.Fn m_getcl "int how" "short type" "int flags" 77.Ft struct mbuf * 78.Fn m_getclr "int how" "int type" 79.Ft struct mbuf * 80.Fn m_gethdr "int how" "int type" 81.Ft struct mbuf * 82.Fn m_free "struct mbuf *mbuf" 83.Ft void 84.Fn m_freem "struct mbuf *mbuf" 85.\" 86.Ss Mbuf utility functions 87.Ft void 88.Fn m_adj "struct mbuf *mbuf" "int len" 89.Ft void 90.Fn m_align "struct mbuf *mbuf" "int len" 91.Ft int 92.Fn m_append "struct mbuf *mbuf" "int len" "c_caddr_t cp" 93.Ft struct mbuf * 94.Fn m_prepend "struct mbuf *mbuf" "int len" "int how" 95.Ft struct mbuf * 96.Fn m_copyup "struct mbuf *mbuf" "int len" "int dstoff" 97.Ft struct mbuf * 98.Fn m_pullup "struct mbuf *mbuf" "int len" 99.Ft struct mbuf * 100.Fn m_copym "struct mbuf *mbuf" "int offset" "int len" "int how" 101.Ft struct mbuf * 102.Fn m_copypacket "struct mbuf *mbuf" "int how" 103.Ft struct mbuf * 104.Fn m_dup "struct mbuf *mbuf" "int how" 105.Ft void 106.Fn m_copydata "const struct mbuf *mbuf" "int offset" "int len" "caddr_t buf" 107.Ft void 108.Fn m_copyback "struct mbuf *mbuf" "int offset" "int len" "caddr_t buf" 109.Ft struct mbuf * 110.Fo m_devget 111.Fa "char *buf" 112.Fa "int len" 113.Fa "int offset" 114.Fa "struct ifnet *ifp" 115.Fa "void (*copy)(char *from, caddr_t to, u_int len)" 116.Fc 117.Ft void 118.Fn m_cat "struct mbuf *m" "struct mbuf *n" 119.Ft u_int 120.Fn m_fixhdr "struct mbuf *mbuf" 121.Ft void 122.Fn m_dup_pkthdr "struct mbuf *to" "struct mbuf *from" 123.Ft void 124.Fn m_move_pkthdr "struct mbuf *to" "struct mbuf *from" 125.Ft u_int 126.Fn m_length "struct mbuf *mbuf" "struct mbuf **last" 127.Ft struct mbuf * 128.Fn m_split "struct mbuf *mbuf" "int len" "int how" 129.Ft int 130.Fn m_apply "struct mbuf *mbuf" "int off" "int len" "int (*f)(void *arg, void *data, u_int len)" "void *arg" 131.Ft struct mbuf * 132.Fn m_getptr "struct mbuf *mbuf" "int loc" "int *off" 133.Ft struct mbuf * 134.Fn m_defrag "struct mbuf *m0" "int how" 135.Ft struct mbuf * 136.Fn m_unshare "struct mbuf *m0" "int how" 137.\" 138.Sh DESCRIPTION 139An 140.Vt mbuf 141is a basic unit of memory management in the kernel IPC subsystem. 142Network packets and socket buffers are stored in 143.Vt mbufs . 144A network packet may span multiple 145.Vt mbufs 146arranged into a 147.Vt mbuf chain 148(linked list), 149which allows adding or trimming 150network headers with little overhead. 151.Pp 152While a developer should not bother with 153.Vt mbuf 154internals without serious 155reason in order to avoid incompatibilities with future changes, it 156is useful to understand the general structure of an 157.Vt mbuf . 158.Pp 159An 160.Vt mbuf 161consists of a variable-sized header and a small internal 162buffer for data. 163The total size of an 164.Vt mbuf , 165.Dv MSIZE , 166is a constant defined in 167.In sys/param.h . 168The 169.Vt mbuf 170header includes: 171.Pp 172.Bl -tag -width "m_nextpkt" -offset indent 173.It Va m_next 174.Pq Vt struct mbuf * 175A pointer to the next 176.Vt mbuf 177in the 178.Vt mbuf chain . 179.It Va m_nextpkt 180.Pq Vt struct mbuf * 181A pointer to the next 182.Vt mbuf chain 183in the queue. 184.It Va m_data 185.Pq Vt caddr_t 186A pointer to data attached to this 187.Vt mbuf . 188.It Va m_len 189.Pq Vt int 190The length of the data. 191.It Va m_type 192.Pq Vt short 193The type of the data. 194.It Va m_flags 195.Pq Vt int 196The 197.Vt mbuf 198flags. 199.El 200.Pp 201The 202.Vt mbuf 203flag bits are defined as follows: 204.Bd -literal 205/* mbuf flags */ 206#define M_EXT 0x0001 /* has associated external storage */ 207#define M_PKTHDR 0x0002 /* start of record */ 208#define M_EOR 0x0004 /* end of record */ 209#define M_RDONLY 0x0008 /* associated data marked read-only */ 210#define M_PROTO1 0x0010 /* protocol-specific */ 211#define M_PROTO2 0x0020 /* protocol-specific */ 212#define M_PROTO3 0x0040 /* protocol-specific */ 213#define M_PROTO4 0x0080 /* protocol-specific */ 214#define M_PROTO5 0x0100 /* protocol-specific */ 215#define M_PROTO6 0x4000 /* protocol-specific (avoid M_BCAST conflict) */ 216#define M_FREELIST 0x8000 /* mbuf is on the free list */ 217 218/* mbuf pkthdr flags (also stored in m_flags) */ 219#define M_BCAST 0x0200 /* send/received as link-level broadcast */ 220#define M_MCAST 0x0400 /* send/received as link-level multicast */ 221#define M_FRAG 0x0800 /* packet is fragment of larger packet */ 222#define M_FIRSTFRAG 0x1000 /* packet is first fragment */ 223#define M_LASTFRAG 0x2000 /* packet is last fragment */ 224.Ed 225.Pp 226The available 227.Vt mbuf 228types are defined as follows: 229.Bd -literal 230/* mbuf types */ 231#define MT_DATA 1 /* dynamic (data) allocation */ 232#define MT_HEADER 2 /* packet header */ 233#define MT_SONAME 8 /* socket name */ 234#define MT_FTABLE 11 /* fragment reassembly header */ 235#define MT_CONTROL 14 /* extra-data protocol message */ 236#define MT_OOBDATA 15 /* expedited data */ 237.Ed 238.Pp 239If the 240.Dv M_PKTHDR 241flag is set, a 242.Vt struct pkthdr Va m_pkthdr 243is added to the 244.Vt mbuf 245header. 246It contains a pointer to the interface 247the packet has been received from 248.Pq Vt struct ifnet Va *rcvif , 249and the total packet length 250.Pq Vt int Va len . 251Optionally, it may also contain an attached list of packet tags 252.Pq Vt "struct m_tag" . 253See 254.Xr mbuf_tags 9 255for details. 256Fields used in offloading checksum calculation to the hardware are kept in 257.Va m_pkthdr 258as well. 259See 260.Sx HARDWARE-ASSISTED CHECKSUM CALCULATION 261for details. 262.Pp 263If small enough, data is stored in the internal data buffer of an 264.Vt mbuf . 265If the data is sufficiently large, another 266.Vt mbuf 267may be added to the 268.Vt mbuf chain , 269or external storage may be associated with the 270.Vt mbuf . 271.Dv MHLEN 272bytes of data can fit into an 273.Vt mbuf 274with the 275.Dv M_PKTHDR 276flag set, 277.Dv MLEN 278bytes can otherwise. 279.Pp 280If external storage is being associated with an 281.Vt mbuf , 282the 283.Va m_ext 284header is added at the cost of losing the internal data buffer. 285It includes a pointer to external storage, the size of the storage, 286a pointer to a function used for freeing the storage, 287a pointer to an optional argument that can be passed to the function, 288and a pointer to a reference counter. 289An 290.Vt mbuf 291using external storage has the 292.Dv M_EXT 293flag set. 294.Pp 295The system supplies a macro for allocating the desired external storage 296buffer, 297.Dv MEXTADD . 298.Pp 299The allocation and management of the reference counter is handled by the 300subsystem. 301.Pp 302The system also supplies a default type of external storage buffer called an 303.Vt mbuf cluster . 304.Vt Mbuf clusters 305can be allocated and configured with the use of the 306.Dv MCLGET 307macro. 308Each 309.Vt mbuf cluster 310is 311.Dv MCLBYTES 312in size, where MCLBYTES is a machine-dependent constant. 313The system defines an advisory macro 314.Dv MINCLSIZE , 315which is the smallest amount of data to put into an 316.Vt mbuf cluster . 317It is equal to the sum of 318.Dv MLEN 319and 320.Dv MHLEN . 321It is typically preferable to store data into the data region of an 322.Vt mbuf , 323if size permits, as opposed to allocating a separate 324.Vt mbuf cluster 325to hold the same data. 326.\" 327.Ss Macros and Functions 328There are numerous predefined macros and functions that provide the 329developer with common utilities. 330.\" 331.Bl -ohang -offset indent 332.It Fn mtod mbuf type 333Convert an 334.Fa mbuf 335pointer to a data pointer. 336The macro expands to the data pointer cast to the pointer of the specified 337.Fa type . 338.Sy Note : 339It is advisable to ensure that there is enough contiguous data in 340.Fa mbuf . 341See 342.Fn m_pullup 343for details. 344.It Fn MGET mbuf how type 345Allocate an 346.Vt mbuf 347and initialize it to contain internal data. 348.Fa mbuf 349will point to the allocated 350.Vt mbuf 351on success, or be set to 352.Dv NULL 353on failure. 354The 355.Fa how 356argument is to be set to 357.Dv M_TRYWAIT 358or 359.Dv M_DONTWAIT . 360It specifies whether the caller is willing to block if necessary. 361If 362.Fa how 363is set to 364.Dv M_TRYWAIT , 365a failed allocation will result in the caller being put 366to sleep for a designated 367kern.ipc.mbuf_wait 368.Xr ( sysctl 8 369tunable) 370number of ticks. 371A number of other functions and macros related to 372.Vt mbufs 373have the same argument because they may 374at some point need to allocate new 375.Vt mbufs . 376.Pp 377Programmers should be careful not to confuse the 378.Vt mbuf 379allocation flag 380.Dv M_DONTWAIT 381with the 382.Xr malloc 9 383allocation flag, 384.Dv M_NOWAIT . 385They are not the same. 386.It Fn MGETHDR mbuf how type 387Allocate an 388.Vt mbuf 389and initialize it to contain a packet header 390and internal data. 391See 392.Fn MGET 393for details. 394.It Fn MCLGET mbuf how 395Allocate and attach an 396.Vt mbuf cluster 397to 398.Fa mbuf . 399If the macro fails, the 400.Dv M_EXT 401flag will not be set in 402.Fa mbuf . 403.It Fn M_ALIGN mbuf len 404Set the pointer 405.Fa mbuf->m_data 406to place an object of the size 407.Fa len 408at the end of the internal data area of 409.Fa mbuf , 410long word aligned. 411Applicable only if 412.Fa mbuf 413is newly allocated with 414.Fn MGET 415or 416.Fn m_get . 417.It Fn MH_ALIGN mbuf len 418Serves the same purpose as 419.Fn M_ALIGN 420does, but only for 421.Fa mbuf 422newly allocated with 423.Fn MGETHDR 424or 425.Fn m_gethdr , 426or initialized by 427.Fn m_dup_pkthdr 428or 429.Fn m_move_pkthdr . 430.It Fn m_align mbuf len 431Services the same purpose as 432.Fn M_ALIGN 433but handles any type of mbuf. 434.It Fn M_LEADINGSPACE mbuf 435Returns the number of bytes available before the beginning 436of data in 437.Fa mbuf . 438.It Fn M_TRAILINGSPACE mbuf 439Returns the number of bytes available after the end of data in 440.Fa mbuf . 441.It Fn M_PREPEND mbuf len how 442This macro operates on an 443.Vt mbuf chain . 444It is an optimized wrapper for 445.Fn m_prepend 446that can make use of possible empty space before data 447(e.g.\& left after trimming of a link-layer header). 448The new 449.Vt mbuf chain 450pointer or 451.Dv NULL 452is in 453.Fa mbuf 454after the call. 455.It Fn M_MOVE_PKTHDR to from 456Using this macro is equivalent to calling 457.Fn m_move_pkthdr to from . 458.It Fn M_WRITABLE mbuf 459This macro will evaluate true if 460.Fa mbuf 461is not marked 462.Dv M_RDONLY 463and if either 464.Fa mbuf 465does not contain external storage or, 466if it does, 467then if the reference count of the storage is not greater than 1. 468The 469.Dv M_RDONLY 470flag can be set in 471.Fa mbuf->m_flags . 472This can be achieved during setup of the external storage, 473by passing the 474.Dv M_RDONLY 475bit as a 476.Fa flags 477argument to the 478.Fn MEXTADD 479macro, or can be directly set in individual 480.Vt mbufs . 481.It Fn MCHTYPE mbuf type 482Change the type of 483.Fa mbuf 484to 485.Fa type . 486This is a relatively expensive operation and should be avoided. 487.El 488.Pp 489The functions are: 490.Bl -ohang -offset indent 491.It Fn m_get how type 492A function version of 493.Fn MGET 494for non-critical paths. 495.It Fn m_getm orig len how type 496Allocate 497.Fa len 498bytes worth of 499.Vt mbufs 500and 501.Vt mbuf clusters 502if necessary and append the resulting allocated 503.Vt mbuf chain 504to the 505.Vt mbuf chain 506.Fa orig , 507if it is 508.No non- Ns Dv NULL . 509If the allocation fails at any point, 510free whatever was allocated and return 511.Dv NULL . 512If 513.Fa orig 514is 515.No non- Ns Dv NULL , 516it will not be freed. 517It is possible to use 518.Fn m_getm 519to either append 520.Fa len 521bytes to an existing 522.Vt mbuf 523or 524.Vt mbuf chain 525(for example, one which may be sitting in a pre-allocated ring) 526or to simply perform an all-or-nothing 527.Vt mbuf 528and 529.Vt mbuf cluster 530allocation. 531.It Fn m_gethdr how type 532A function version of 533.Fn MGETHDR 534for non-critical paths. 535.It Fn m_getcl how type flags 536Fetch an 537.Vt mbuf 538with a 539.Vt mbuf cluster 540attached to it. 541If one of the allocations fails, the entire allocation fails. 542This routine is the preferred way of fetching both the 543.Vt mbuf 544and 545.Vt mbuf cluster 546together, as it avoids having to unlock/relock between allocations. 547Returns 548.Dv NULL 549on failure. 550.It Fn m_getclr how type 551Allocate an 552.Vt mbuf 553and zero out the data region. 554.It Fn m_free mbuf 555Frees 556.Vt mbuf . 557Returns 558.Va m_next 559of the freed 560.Vt mbuf . 561.El 562.Pp 563The functions below operate on 564.Vt mbuf chains . 565.Bl -ohang -offset indent 566.It Fn m_freem mbuf 567Free an entire 568.Vt mbuf chain , 569including any external storage. 570.\" 571.It Fn m_adj mbuf len 572Trim 573.Fa len 574bytes from the head of an 575.Vt mbuf chain 576if 577.Fa len 578is positive, from the tail otherwise. 579.\" 580.It Fn m_append mbuf len cp 581Append 582.Vt len 583bytes of data 584.Vt cp 585to the 586.Vt mbuf chain . 587Extend the mbuf chain if the new data does not fit in 588existing space. 589.\" 590.It Fn m_prepend mbuf len how 591Allocate a new 592.Vt mbuf 593and prepend it to the 594.Vt mbuf chain , 595handle 596.Dv M_PKTHDR 597properly. 598.Sy Note : 599It does not allocate any 600.Vt mbuf clusters , 601so 602.Fa len 603must be less than 604.Dv MLEN 605or 606.Dv MHLEN , 607depending on the 608.Dv M_PKTHDR 609flag setting. 610.\" 611.It Fn m_copyup mbuf len dstoff 612Similar to 613.Fn m_pullup 614but copies 615.Fa len 616bytes of data into a new mbuf at 617.Fa dstoff 618bytes into the mbuf. 619The 620.Fa dstoff 621argument aligns the data and leaves room for a link layer header. 622Returns the new 623.Vt mbuf chain 624on success, 625and frees the 626.Vt mbuf chain 627and returns 628.Dv NULL 629on failure. 630.Sy Note : 631The function does not allocate 632.Vt mbuf clusters , 633so 634.Fa len + dstoff 635must be less than 636.Dv MHLEN . 637.\" 638.It Fn m_pullup mbuf len 639Arrange that the first 640.Fa len 641bytes of an 642.Vt mbuf chain 643are contiguous and lay in the data area of 644.Fa mbuf , 645so they are accessible with 646.Fn mtod mbuf type . 647Return the new 648.Vt mbuf chain 649on success, 650.Dv NULL 651on failure 652(the 653.Vt mbuf chain 654is freed in this case). 655.Sy Note : 656It does not allocate any 657.Vt mbuf clusters , 658so 659.Fa len 660must be less than 661.Dv MHLEN . 662.\" 663.It Fn m_copym mbuf offset len how 664Make a copy of an 665.Vt mbuf chain 666starting 667.Fa offset 668bytes from the beginning, continuing for 669.Fa len 670bytes. 671If 672.Fa len 673is 674.Dv M_COPYALL , 675copy to the end of the 676.Vt mbuf chain . 677.Sy Note : 678The copy is read-only, because the 679.Vt mbuf clusters 680are not copied, only their reference counts are incremented. 681.\" 682.It Fn m_copypacket mbuf how 683Copy an entire packet including header, which must be present. 684This is an optimized version of the common case 685.Fn m_copym mbuf 0 M_COPYALL how . 686.Sy Note : 687the copy is read-only, because the 688.Vt mbuf clusters 689are not copied, only their reference counts are incremented. 690.\" 691.It Fn m_dup mbuf how 692Copy a packet header 693.Vt mbuf chain 694into a completely new 695.Vt mbuf chain , 696including copying any 697.Vt mbuf clusters . 698Use this instead of 699.Fn m_copypacket 700when you need a writable copy of an 701.Vt mbuf chain . 702.\" 703.It Fn m_copydata mbuf offset len buf 704Copy data from an 705.Vt mbuf chain 706starting 707.Fa off 708bytes from the beginning, continuing for 709.Fa len 710bytes, into the indicated buffer 711.Fa buf . 712.\" 713.It Fn m_copyback mbuf offset len buf 714Copy 715.Fa len 716bytes from the buffer 717.Fa buf 718back into the indicated 719.Vt mbuf chain , 720starting at 721.Fa offset 722bytes from the beginning of the 723.Vt mbuf chain , 724extending the 725.Vt mbuf chain 726if necessary. 727.Sy Note : 728It does not allocate any 729.Vt mbuf clusters , 730just adds 731.Vt mbufs 732to the 733.Vt mbuf chain . 734It is safe to set 735.Fa offset 736beyond the current 737.Vt mbuf chain 738end: zeroed 739.Vt mbufs 740will be allocated to fill the space. 741.\" 742.It Fn m_length mbuf last 743Return the length of the 744.Vt mbuf chain , 745and optionally a pointer to the last 746.Vt mbuf . 747.\" 748.It Fn m_dup_pkthdr to from how 749Upon the function's completion, the 750.Vt mbuf 751.Fa to 752will contain an identical copy of 753.Fa from->m_pkthdr 754and the per-packet attributes found in the 755.Vt mbuf chain 756.Fa from . 757The 758.Vt mbuf 759.Fa from 760must have the flag 761.Dv M_PKTHDR 762initially set, and 763.Fa to 764must be empty on entry. 765.\" 766.It Fn m_move_pkthdr to from 767Move 768.Va m_pkthdr 769and the per-packet attributes from the 770.Vt mbuf chain 771.Fa from 772to the 773.Vt mbuf 774.Fa to . 775The 776.Vt mbuf 777.Fa from 778must have the flag 779.Dv M_PKTHDR 780initially set, and 781.Fa to 782must be empty on entry. 783Upon the function's completion, 784.Fa from 785will have the flag 786.Dv M_PKTHDR 787and the per-packet attributes cleared. 788.\" 789.It Fn m_fixhdr mbuf 790Set the packet-header length to the length of the 791.Vt mbuf chain . 792.\" 793.It Fn m_devget buf len offset ifp copy 794Copy data from a device local memory pointed to by 795.Fa buf 796to an 797.Vt mbuf chain . 798The copy is done using a specified copy routine 799.Fa copy , 800or 801.Fn bcopy 802if 803.Fa copy 804is 805.Dv NULL . 806.\" 807.It Fn m_cat m n 808Concatenate 809.Fa n 810to 811.Fa m . 812Both 813.Vt mbuf chains 814must be of the same type. 815.Fa N 816is still valid after the function returned. 817.Sy Note : 818It does not handle 819.Dv M_PKTHDR 820and friends. 821.\" 822.It Fn m_split mbuf len how 823Partition an 824.Vt mbuf chain 825in two pieces, returning the tail: 826all but the first 827.Fa len 828bytes. 829In case of failure, it returns 830.Dv NULL 831and attempts to restore the 832.Vt mbuf chain 833to its original state. 834.\" 835.It Fn m_apply mbuf off len f arg 836Apply a function to an 837.Vt mbuf chain , 838at offset 839.Fa off , 840for length 841.Fa len 842bytes. 843Typically used to avoid calls to 844.Fn m_pullup 845which would otherwise be unnecessary or undesirable. 846.Fa arg 847is a convenience argument which is passed to the callback function 848.Fa f . 849.Pp 850Each time 851.Fn f 852is called, it will be passed 853.Fa arg , 854a pointer to the 855.Fa data 856in the current mbuf, and the length 857.Fa len 858of the data in this mbuf to which the function should be applied. 859.Pp 860The function should return zero to indicate success; 861otherwise, if an error is indicated, then 862.Fn m_apply 863will return the error and stop iterating through the 864.Vt mbuf chain . 865.\" 866.It Fn m_getptr mbuf loc off 867Return a pointer to the mbuf containing the data located at 868.Fa loc 869bytes from the beginning of the 870.Vt mbuf chain . 871The corresponding offset into the mbuf will be stored in 872.Fa *off . 873.It Fn m_defrag m0 how 874Defragment an mbuf chain, returning the shortest possible 875chain of mbufs and clusters. 876If allocation fails and this can not be completed, 877.Dv NULL 878will be returned and the original chain will be unchanged. 879Upon success, the original chain will be freed and the new 880chain will be returned. 881.Fa how 882should be either 883.Dv M_TRYWAIT 884or 885.Dv M_DONTWAIT , 886depending on the caller's preference. 887.Pp 888This function is especially useful in network drivers, where 889certain long mbuf chains must be shortened before being added 890to TX descriptor lists. 891.It Fn m_unshare m0 how 892Create a version of the specified mbuf chain whose 893contents can be safely modified without affecting other users. 894If allocation fails and this operation can not be completed, 895.Dv NULL 896will be returned. 897The original mbuf chain is always reclaimed and the reference 898count of any shared mbuf clusters is decremented. 899.Fa how 900should be either 901.Dv M_TRYWAIT 902or 903.Dv M_DONTWAIT , 904depending on the caller's preference. 905As a side-effect of this process the returned 906mbuf chain may be compacted. 907.Pp 908This function is especially useful in the transmit path of 909network code, when data must be encrypted or otherwise 910altered prior to transmission. 911.El 912.Sh HARDWARE-ASSISTED CHECKSUM CALCULATION 913This section currently applies to TCP/IP only. 914In order to save the host CPU resources, computing checksums is 915offloaded to the network interface hardware if possible. 916The 917.Va m_pkthdr 918member of the leading 919.Vt mbuf 920of a packet contains two fields used for that purpose, 921.Vt int Va csum_flags 922and 923.Vt int Va csum_data . 924The meaning of those fields depends on the direction a packet flows in, 925and on whether the packet is fragmented. 926Henceforth, 927.Va csum_flags 928or 929.Va csum_data 930of a packet 931will denote the corresponding field of the 932.Va m_pkthdr 933member of the leading 934.Vt mbuf 935in the 936.Vt mbuf chain 937containing the packet. 938.Pp 939On output, checksum offloading is attempted after the outgoing 940interface has been determined for a packet. 941The interface-specific field 942.Va ifnet.if_data.ifi_hwassist 943(see 944.Xr ifnet 9 ) 945is consulted for the capabilities of the interface to assist in 946computing checksums. 947The 948.Va csum_flags 949field of the packet header is set to indicate which actions the interface 950is supposed to perform on it. 951The actions unsupported by the network interface are done in the 952software prior to passing the packet down to the interface driver; 953such actions will never be requested through 954.Va csum_flags . 955.Pp 956The flags demanding a particular action from an interface are as follows: 957.Bl -tag -width ".Dv CSUM_TCP" -offset indent 958.It Dv CSUM_IP 959The IP header checksum is to be computed and stored in the 960corresponding field of the packet. 961The hardware is expected to know the format of an IP header 962to determine the offset of the IP checksum field. 963.It Dv CSUM_TCP 964The TCP checksum is to be computed. 965(See below.) 966.It Dv CSUM_UDP 967The UDP checksum is to be computed. 968(See below.) 969.El 970.Pp 971Should a TCP or UDP checksum be offloaded to the hardware, 972the field 973.Va csum_data 974will contain the byte offset of the checksum field relative to the 975end of the IP header. 976In this case, the checksum field will be initially 977set by the TCP/IP module to the checksum of the pseudo header 978defined by the TCP and UDP specifications. 979.Pp 980For outbound packets which have been fragmented 981by the host CPU, the following will also be true, 982regardless of the checksum flag settings: 983.Bl -bullet -offset indent 984.It 985all fragments will have the flag 986.Dv M_FRAG 987set in their 988.Va m_flags 989field; 990.It 991the first and the last fragments in the chain will have 992.Dv M_FIRSTFRAG 993or 994.Dv M_LASTFRAG 995set in their 996.Va m_flags , 997correspondingly; 998.It 999the first fragment in the chain will have the total number 1000of fragments contained in its 1001.Va csum_data 1002field. 1003.El 1004.Pp 1005The last rule for fragmented packets takes precedence over the one 1006for a TCP or UDP checksum. 1007Nevertheless, offloading a TCP or UDP checksum is possible for a 1008fragmented packet if the flag 1009.Dv CSUM_IP_FRAGS 1010is set in the field 1011.Va ifnet.if_data.ifi_hwassist 1012associated with the network interface. 1013However, in this case the interface is expected to figure out 1014the location of the checksum field within the sequence of fragments 1015by itself because 1016.Va csum_data 1017contains a fragment count instead of a checksum offset value. 1018.Pp 1019On input, an interface indicates the actions it has performed 1020on a packet by setting one or more of the following flags in 1021.Va csum_flags 1022associated with the packet: 1023.Bl -tag -width ".Dv CSUM_IP_CHECKED" -offset indent 1024.It Dv CSUM_IP_CHECKED 1025The IP header checksum has been computed. 1026.It Dv CSUM_IP_VALID 1027The IP header has a valid checksum. 1028This flag can appear only in combination with 1029.Dv CSUM_IP_CHECKED . 1030.It Dv CSUM_DATA_VALID 1031The checksum of the data portion of the IP packet has been computed 1032and stored in the field 1033.Va csum_data 1034in network byte order. 1035.It Dv CSUM_PSEUDO_HDR 1036Can be set only along with 1037.Dv CSUM_DATA_VALID 1038to indicate that the IP data checksum found in 1039.Va csum_data 1040allows for the pseudo header defined by the TCP and UDP specifications. 1041Otherwise the checksum of the pseudo header must be calculated by 1042the host CPU and added to 1043.Va csum_data 1044to obtain the final checksum to be used for TCP or UDP validation purposes. 1045.El 1046.Pp 1047If a particular network interface just indicates success or 1048failure of TCP or UDP checksum validation without returning 1049the exact value of the checksum to the host CPU, its driver can mark 1050.Dv CSUM_DATA_VALID 1051and 1052.Dv CSUM_PSEUDO_HDR 1053in 1054.Va csum_flags , 1055and set 1056.Va csum_data 1057to 1058.Li 0xFFFF 1059hexadecimal to indicate a valid checksum. 1060It is a peculiarity of the algorithm used that the Internet checksum 1061calculated over any valid packet will be 1062.Li 0xFFFF 1063as long as the original checksum field is included. 1064.Pp 1065For inbound packets which are IP fragments, all 1066.Va csum_data 1067fields will be summed during reassembly to obtain the final checksum 1068value passed to an upper layer in the 1069.Va csum_data 1070field of the reassembled packet. 1071The 1072.Va csum_flags 1073fields of all fragments will be consolidated using logical AND 1074to obtain the final value for 1075.Va csum_flags . 1076Thus, in order to successfully 1077offload checksum computation for fragmented data, 1078all fragments should have the same value of 1079.Va csum_flags . 1080.Sh STRESS TESTING 1081When running a kernel compiled with the option 1082.Dv MBUF_STRESS_TEST , 1083the following 1084.Xr sysctl 8 Ns 1085-controlled options may be used to create 1086various failure/extreme cases for testing of network drivers 1087and other parts of the kernel that rely on 1088.Vt mbufs . 1089.Bl -tag -width ident 1090.It Va net.inet.ip.mbuf_frag_size 1091Causes 1092.Fn ip_output 1093to fragment outgoing 1094.Vt mbuf chains 1095into fragments of the specified size. 1096Setting this variable to 1 is an excellent way to 1097test the long 1098.Vt mbuf chain 1099handling ability of network drivers. 1100.It Va kern.ipc.m_defragrandomfailures 1101Causes the function 1102.Fn m_defrag 1103to randomly fail, returning 1104.Dv NULL . 1105Any piece of code which uses 1106.Fn m_defrag 1107should be tested with this feature. 1108.El 1109.Sh RETURN VALUES 1110See above. 1111.Sh SEE ALSO 1112.Xr ifnet 9 , 1113.Xr mbuf_tags 9 1114.Sh HISTORY 1115.\" Please correct me if I'm wrong 1116.Vt Mbufs 1117appeared in an early version of 1118.Bx . 1119Besides being used for network packets, they were used 1120to store various dynamic structures, such as routing table 1121entries, interface addresses, protocol control blocks, etc. 1122.Sh AUTHORS 1123The original 1124.Nm 1125manual page was written by Yar Tikhiy. 1126