1.\" Copyright (c) 2000 FreeBSD Inc. 2.\" All rights reserved. 3.\" 4.\" Redistribution and use in source and binary forms, with or without 5.\" modification, are permitted provided that the following conditions 6.\" are met: 7.\" 1. Redistributions of source code must retain the above copyright 8.\" notice, this list of conditions and the following disclaimer. 9.\" 2. Redistributions in binary form must reproduce the above copyright 10.\" notice, this list of conditions and the following disclaimer in the 11.\" documentation and/or other materials provided with the distribution. 12.\" 13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16.\" ARE DISCLAIMED. IN NO EVENT SHALL [your name] OR CONTRIBUTORS BE LIABLE 17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23.\" SUCH DAMAGE. 24.\" 25.\" $FreeBSD$ 26.\" 27.Dd July 24, 2006 28.Dt MBUF 9 29.Os 30.\" 31.Sh NAME 32.Nm mbuf 33.Nd "memory management in the kernel IPC subsystem" 34.\" 35.Sh SYNOPSIS 36.In sys/param.h 37.In sys/systm.h 38.In sys/mbuf.h 39.\" 40.Ss Mbuf allocation macros 41.Fn MGET "struct mbuf *mbuf" "int how" "short type" 42.Fn MGETHDR "struct mbuf *mbuf" "int how" "short type" 43.Fn MCLGET "struct mbuf *mbuf" "int how" 44.Fo MEXTADD 45.Fa "struct mbuf *mbuf" 46.Fa "caddr_t buf" 47.Fa "u_int size" 48.Fa "void (*free)(void *opt_args)" 49.Fa "void *opt_args" 50.Fa "short flags" 51.Fa "int type" 52.Fc 53.Fn MEXTFREE "struct mbuf *mbuf" 54.Fn MFREE "struct mbuf *mbuf" "struct mbuf *successor" 55.\" 56.Ss Mbuf utility macros 57.Fn mtod "struct mbuf *mbuf" "type" 58.Fn M_ALIGN "struct mbuf *mbuf" "u_int len" 59.Fn MH_ALIGN "struct mbuf *mbuf" "u_int len" 60.Ft int 61.Fn M_LEADINGSPACE "struct mbuf *mbuf" 62.Ft int 63.Fn M_TRAILINGSPACE "struct mbuf *mbuf" 64.Fn M_MOVE_PKTHDR "struct mbuf *to" "struct mbuf *from" 65.Fn M_PREPEND "struct mbuf *mbuf" "int len" "int how" 66.Fn MCHTYPE "struct mbuf *mbuf" "u_int type" 67.Ft int 68.Fn M_WRITABLE "struct mbuf *mbuf" 69.\" 70.Ss Mbuf allocation functions 71.Ft struct mbuf * 72.Fn m_get "int how" "int type" 73.Ft struct mbuf * 74.Fn m_getm "struct mbuf *orig" "int len" "int how" "int type" 75.Ft struct mbuf * 76.Fn m_getcl "int how" "short type" "int flags" 77.Ft struct mbuf * 78.Fn m_getclr "int how" "int type" 79.Ft struct mbuf * 80.Fn m_gethdr "int how" "int type" 81.Ft struct mbuf * 82.Fn m_free "struct mbuf *mbuf" 83.Ft void 84.Fn m_freem "struct mbuf *mbuf" 85.\" 86.Ss Mbuf utility functions 87.Ft void 88.Fn m_adj "struct mbuf *mbuf" "int len" 89.Ft void 90.Fn m_align "struct mbuf *mbuf" "int len" 91.Ft int 92.Fn m_append "struct mbuf *mbuf" "int len" "c_caddr_t cp" 93.Ft struct mbuf * 94.Fn m_prepend "struct mbuf *mbuf" "int len" "int how" 95.Ft struct mbuf * 96.Fn m_copyup "struct mbuf *mbuf" "int len" "int dstoff" 97.Ft struct mbuf * 98.Fn m_pullup "struct mbuf *mbuf" "int len" 99.Ft struct mbuf * 100.Fn m_copym "struct mbuf *mbuf" "int offset" "int len" "int how" 101.Ft struct mbuf * 102.Fn m_copypacket "struct mbuf *mbuf" "int how" 103.Ft struct mbuf * 104.Fn m_dup "struct mbuf *mbuf" "int how" 105.Ft void 106.Fn m_copydata "const struct mbuf *mbuf" "int offset" "int len" "caddr_t buf" 107.Ft void 108.Fn m_copyback "struct mbuf *mbuf" "int offset" "int len" "caddr_t buf" 109.Ft struct mbuf * 110.Fo m_devget 111.Fa "char *buf" 112.Fa "int len" 113.Fa "int offset" 114.Fa "struct ifnet *ifp" 115.Fa "void (*copy)(char *from, caddr_t to, u_int len)" 116.Fc 117.Ft void 118.Fn m_cat "struct mbuf *m" "struct mbuf *n" 119.Ft u_int 120.Fn m_fixhdr "struct mbuf *mbuf" 121.Ft void 122.Fn m_dup_pkthdr "struct mbuf *to" "struct mbuf *from" 123.Ft void 124.Fn m_move_pkthdr "struct mbuf *to" "struct mbuf *from" 125.Ft u_int 126.Fn m_length "struct mbuf *mbuf" "struct mbuf **last" 127.Ft struct mbuf * 128.Fn m_split "struct mbuf *mbuf" "int len" "int how" 129.Ft int 130.Fn m_apply "struct mbuf *mbuf" "int off" "int len" "int (*f)(void *arg, void *data, u_int len)" "void *arg" 131.Ft struct mbuf * 132.Fn m_getptr "struct mbuf *mbuf" "int loc" "int *off" 133.Ft struct mbuf * 134.Fn m_defrag "struct mbuf *m0" "int how" 135.Ft struct mbuf * 136.Fn m_unshare "struct mbuf *m0" "int how" 137.\" 138.Sh DESCRIPTION 139An 140.Vt mbuf 141is a basic unit of memory management in the kernel IPC subsystem. 142Network packets and socket buffers are stored in 143.Vt mbufs . 144A network packet may span multiple 145.Vt mbufs 146arranged into a 147.Vt mbuf chain 148(linked list), 149which allows adding or trimming 150network headers with little overhead. 151.Pp 152While a developer should not bother with 153.Vt mbuf 154internals without serious 155reason in order to avoid incompatibilities with future changes, it 156is useful to understand the general structure of an 157.Vt mbuf . 158.Pp 159An 160.Vt mbuf 161consists of a variable-sized header and a small internal 162buffer for data. 163The total size of an 164.Vt mbuf , 165.Dv MSIZE , 166is a constant defined in 167.In sys/param.h . 168The 169.Vt mbuf 170header includes: 171.Pp 172.Bl -tag -width "m_nextpkt" -offset indent 173.It Va m_next 174.Pq Vt struct mbuf * 175A pointer to the next 176.Vt mbuf 177in the 178.Vt mbuf chain . 179.It Va m_nextpkt 180.Pq Vt struct mbuf * 181A pointer to the next 182.Vt mbuf chain 183in the queue. 184.It Va m_data 185.Pq Vt caddr_t 186A pointer to data attached to this 187.Vt mbuf . 188.It Va m_len 189.Pq Vt int 190The length of the data. 191.It Va m_type 192.Pq Vt short 193The type of the data. 194.It Va m_flags 195.Pq Vt int 196The 197.Vt mbuf 198flags. 199.El 200.Pp 201The 202.Vt mbuf 203flag bits are defined as follows: 204.Bd -literal 205/* mbuf flags */ 206#define M_EXT 0x0001 /* has associated external storage */ 207#define M_PKTHDR 0x0002 /* start of record */ 208#define M_EOR 0x0004 /* end of record */ 209#define M_RDONLY 0x0008 /* associated data marked read-only */ 210#define M_PROTO1 0x0010 /* protocol-specific */ 211#define M_PROTO2 0x0020 /* protocol-specific */ 212#define M_PROTO3 0x0040 /* protocol-specific */ 213#define M_PROTO4 0x0080 /* protocol-specific */ 214#define M_PROTO5 0x0100 /* protocol-specific */ 215#define M_PROTO6 0x4000 /* protocol-specific (avoid M_BCAST conflict) */ 216#define M_FREELIST 0x8000 /* mbuf is on the free list */ 217 218/* mbuf pkthdr flags (also stored in m_flags) */ 219#define M_BCAST 0x0200 /* send/received as link-level broadcast */ 220#define M_MCAST 0x0400 /* send/received as link-level multicast */ 221#define M_FRAG 0x0800 /* packet is fragment of larger packet */ 222#define M_FIRSTFRAG 0x1000 /* packet is first fragment */ 223#define M_LASTFRAG 0x2000 /* packet is last fragment */ 224.Ed 225.Pp 226The available 227.Vt mbuf 228types are defined as follows: 229.Bd -literal 230/* mbuf types */ 231#define MT_DATA 1 /* dynamic (data) allocation */ 232#define MT_HEADER MT_DATA /* packet header */ 233#define MT_SONAME 8 /* socket name */ 234#define MT_CONTROL 14 /* extra-data protocol message */ 235#define MT_OOBDATA 15 /* expedited data */ 236.Ed 237.Pp 238If the 239.Dv M_PKTHDR 240flag is set, a 241.Vt struct pkthdr Va m_pkthdr 242is added to the 243.Vt mbuf 244header. 245It contains a pointer to the interface 246the packet has been received from 247.Pq Vt struct ifnet Va *rcvif , 248and the total packet length 249.Pq Vt int Va len . 250Optionally, it may also contain an attached list of packet tags 251.Pq Vt "struct m_tag" . 252See 253.Xr mbuf_tags 9 254for details. 255Fields used in offloading checksum calculation to the hardware are kept in 256.Va m_pkthdr 257as well. 258See 259.Sx HARDWARE-ASSISTED CHECKSUM CALCULATION 260for details. 261.Pp 262If small enough, data is stored in the internal data buffer of an 263.Vt mbuf . 264If the data is sufficiently large, another 265.Vt mbuf 266may be added to the 267.Vt mbuf chain , 268or external storage may be associated with the 269.Vt mbuf . 270.Dv MHLEN 271bytes of data can fit into an 272.Vt mbuf 273with the 274.Dv M_PKTHDR 275flag set, 276.Dv MLEN 277bytes can otherwise. 278.Pp 279If external storage is being associated with an 280.Vt mbuf , 281the 282.Va m_ext 283header is added at the cost of losing the internal data buffer. 284It includes a pointer to external storage, the size of the storage, 285a pointer to a function used for freeing the storage, 286a pointer to an optional argument that can be passed to the function, 287and a pointer to a reference counter. 288An 289.Vt mbuf 290using external storage has the 291.Dv M_EXT 292flag set. 293.Pp 294The system supplies a macro for allocating the desired external storage 295buffer, 296.Dv MEXTADD . 297.Pp 298The allocation and management of the reference counter is handled by the 299subsystem. 300.Pp 301The system also supplies a default type of external storage buffer called an 302.Vt mbuf cluster . 303.Vt Mbuf clusters 304can be allocated and configured with the use of the 305.Dv MCLGET 306macro. 307Each 308.Vt mbuf cluster 309is 310.Dv MCLBYTES 311in size, where MCLBYTES is a machine-dependent constant. 312The system defines an advisory macro 313.Dv MINCLSIZE , 314which is the smallest amount of data to put into an 315.Vt mbuf cluster . 316It is equal to the sum of 317.Dv MLEN 318and 319.Dv MHLEN . 320It is typically preferable to store data into the data region of an 321.Vt mbuf , 322if size permits, as opposed to allocating a separate 323.Vt mbuf cluster 324to hold the same data. 325.\" 326.Ss Macros and Functions 327There are numerous predefined macros and functions that provide the 328developer with common utilities. 329.\" 330.Bl -ohang -offset indent 331.It Fn mtod mbuf type 332Convert an 333.Fa mbuf 334pointer to a data pointer. 335The macro expands to the data pointer cast to the pointer of the specified 336.Fa type . 337.Sy Note : 338It is advisable to ensure that there is enough contiguous data in 339.Fa mbuf . 340See 341.Fn m_pullup 342for details. 343.It Fn MGET mbuf how type 344Allocate an 345.Vt mbuf 346and initialize it to contain internal data. 347.Fa mbuf 348will point to the allocated 349.Vt mbuf 350on success, or be set to 351.Dv NULL 352on failure. 353The 354.Fa how 355argument is to be set to 356.Dv M_TRYWAIT 357or 358.Dv M_DONTWAIT . 359It specifies whether the caller is willing to block if necessary. 360If 361.Fa how 362is set to 363.Dv M_TRYWAIT , 364a failed allocation will result in the caller being put 365to sleep for a designated 366kern.ipc.mbuf_wait 367.Xr ( sysctl 8 368tunable) 369number of ticks. 370A number of other functions and macros related to 371.Vt mbufs 372have the same argument because they may 373at some point need to allocate new 374.Vt mbufs . 375.Pp 376Programmers should be careful not to confuse the 377.Vt mbuf 378allocation flag 379.Dv M_DONTWAIT 380with the 381.Xr malloc 9 382allocation flag, 383.Dv M_NOWAIT . 384They are not the same. 385.It Fn MGETHDR mbuf how type 386Allocate an 387.Vt mbuf 388and initialize it to contain a packet header 389and internal data. 390See 391.Fn MGET 392for details. 393.It Fn MCLGET mbuf how 394Allocate and attach an 395.Vt mbuf cluster 396to 397.Fa mbuf . 398If the macro fails, the 399.Dv M_EXT 400flag will not be set in 401.Fa mbuf . 402.It Fn M_ALIGN mbuf len 403Set the pointer 404.Fa mbuf->m_data 405to place an object of the size 406.Fa len 407at the end of the internal data area of 408.Fa mbuf , 409long word aligned. 410Applicable only if 411.Fa mbuf 412is newly allocated with 413.Fn MGET 414or 415.Fn m_get . 416.It Fn MH_ALIGN mbuf len 417Serves the same purpose as 418.Fn M_ALIGN 419does, but only for 420.Fa mbuf 421newly allocated with 422.Fn MGETHDR 423or 424.Fn m_gethdr , 425or initialized by 426.Fn m_dup_pkthdr 427or 428.Fn m_move_pkthdr . 429.It Fn m_align mbuf len 430Services the same purpose as 431.Fn M_ALIGN 432but handles any type of mbuf. 433.It Fn M_LEADINGSPACE mbuf 434Returns the number of bytes available before the beginning 435of data in 436.Fa mbuf . 437.It Fn M_TRAILINGSPACE mbuf 438Returns the number of bytes available after the end of data in 439.Fa mbuf . 440.It Fn M_PREPEND mbuf len how 441This macro operates on an 442.Vt mbuf chain . 443It is an optimized wrapper for 444.Fn m_prepend 445that can make use of possible empty space before data 446(e.g.\& left after trimming of a link-layer header). 447The new 448.Vt mbuf chain 449pointer or 450.Dv NULL 451is in 452.Fa mbuf 453after the call. 454.It Fn M_MOVE_PKTHDR to from 455Using this macro is equivalent to calling 456.Fn m_move_pkthdr to from . 457.It Fn M_WRITABLE mbuf 458This macro will evaluate true if 459.Fa mbuf 460is not marked 461.Dv M_RDONLY 462and if either 463.Fa mbuf 464does not contain external storage or, 465if it does, 466then if the reference count of the storage is not greater than 1. 467The 468.Dv M_RDONLY 469flag can be set in 470.Fa mbuf->m_flags . 471This can be achieved during setup of the external storage, 472by passing the 473.Dv M_RDONLY 474bit as a 475.Fa flags 476argument to the 477.Fn MEXTADD 478macro, or can be directly set in individual 479.Vt mbufs . 480.It Fn MCHTYPE mbuf type 481Change the type of 482.Fa mbuf 483to 484.Fa type . 485This is a relatively expensive operation and should be avoided. 486.El 487.Pp 488The functions are: 489.Bl -ohang -offset indent 490.It Fn m_get how type 491A function version of 492.Fn MGET 493for non-critical paths. 494.It Fn m_getm orig len how type 495Allocate 496.Fa len 497bytes worth of 498.Vt mbufs 499and 500.Vt mbuf clusters 501if necessary and append the resulting allocated 502.Vt mbuf chain 503to the 504.Vt mbuf chain 505.Fa orig , 506if it is 507.No non- Ns Dv NULL . 508If the allocation fails at any point, 509free whatever was allocated and return 510.Dv NULL . 511If 512.Fa orig 513is 514.No non- Ns Dv NULL , 515it will not be freed. 516It is possible to use 517.Fn m_getm 518to either append 519.Fa len 520bytes to an existing 521.Vt mbuf 522or 523.Vt mbuf chain 524(for example, one which may be sitting in a pre-allocated ring) 525or to simply perform an all-or-nothing 526.Vt mbuf 527and 528.Vt mbuf cluster 529allocation. 530.It Fn m_gethdr how type 531A function version of 532.Fn MGETHDR 533for non-critical paths. 534.It Fn m_getcl how type flags 535Fetch an 536.Vt mbuf 537with a 538.Vt mbuf cluster 539attached to it. 540If one of the allocations fails, the entire allocation fails. 541This routine is the preferred way of fetching both the 542.Vt mbuf 543and 544.Vt mbuf cluster 545together, as it avoids having to unlock/relock between allocations. 546Returns 547.Dv NULL 548on failure. 549.It Fn m_getclr how type 550Allocate an 551.Vt mbuf 552and zero out the data region. 553.It Fn m_free mbuf 554Frees 555.Vt mbuf . 556Returns 557.Va m_next 558of the freed 559.Vt mbuf . 560.El 561.Pp 562The functions below operate on 563.Vt mbuf chains . 564.Bl -ohang -offset indent 565.It Fn m_freem mbuf 566Free an entire 567.Vt mbuf chain , 568including any external storage. 569.\" 570.It Fn m_adj mbuf len 571Trim 572.Fa len 573bytes from the head of an 574.Vt mbuf chain 575if 576.Fa len 577is positive, from the tail otherwise. 578.\" 579.It Fn m_append mbuf len cp 580Append 581.Vt len 582bytes of data 583.Vt cp 584to the 585.Vt mbuf chain . 586Extend the mbuf chain if the new data does not fit in 587existing space. 588.\" 589.It Fn m_prepend mbuf len how 590Allocate a new 591.Vt mbuf 592and prepend it to the 593.Vt mbuf chain , 594handle 595.Dv M_PKTHDR 596properly. 597.Sy Note : 598It does not allocate any 599.Vt mbuf clusters , 600so 601.Fa len 602must be less than 603.Dv MLEN 604or 605.Dv MHLEN , 606depending on the 607.Dv M_PKTHDR 608flag setting. 609.\" 610.It Fn m_copyup mbuf len dstoff 611Similar to 612.Fn m_pullup 613but copies 614.Fa len 615bytes of data into a new mbuf at 616.Fa dstoff 617bytes into the mbuf. 618The 619.Fa dstoff 620argument aligns the data and leaves room for a link layer header. 621Returns the new 622.Vt mbuf chain 623on success, 624and frees the 625.Vt mbuf chain 626and returns 627.Dv NULL 628on failure. 629.Sy Note : 630The function does not allocate 631.Vt mbuf clusters , 632so 633.Fa len + dstoff 634must be less than 635.Dv MHLEN . 636.\" 637.It Fn m_pullup mbuf len 638Arrange that the first 639.Fa len 640bytes of an 641.Vt mbuf chain 642are contiguous and lay in the data area of 643.Fa mbuf , 644so they are accessible with 645.Fn mtod mbuf type . 646Return the new 647.Vt mbuf chain 648on success, 649.Dv NULL 650on failure 651(the 652.Vt mbuf chain 653is freed in this case). 654.Sy Note : 655It does not allocate any 656.Vt mbuf clusters , 657so 658.Fa len 659must be less than 660.Dv MHLEN . 661.\" 662.It Fn m_copym mbuf offset len how 663Make a copy of an 664.Vt mbuf chain 665starting 666.Fa offset 667bytes from the beginning, continuing for 668.Fa len 669bytes. 670If 671.Fa len 672is 673.Dv M_COPYALL , 674copy to the end of the 675.Vt mbuf chain . 676.Sy Note : 677The copy is read-only, because the 678.Vt mbuf clusters 679are not copied, only their reference counts are incremented. 680.\" 681.It Fn m_copypacket mbuf how 682Copy an entire packet including header, which must be present. 683This is an optimized version of the common case 684.Fn m_copym mbuf 0 M_COPYALL how . 685.Sy Note : 686the copy is read-only, because the 687.Vt mbuf clusters 688are not copied, only their reference counts are incremented. 689.\" 690.It Fn m_dup mbuf how 691Copy a packet header 692.Vt mbuf chain 693into a completely new 694.Vt mbuf chain , 695including copying any 696.Vt mbuf clusters . 697Use this instead of 698.Fn m_copypacket 699when you need a writable copy of an 700.Vt mbuf chain . 701.\" 702.It Fn m_copydata mbuf offset len buf 703Copy data from an 704.Vt mbuf chain 705starting 706.Fa off 707bytes from the beginning, continuing for 708.Fa len 709bytes, into the indicated buffer 710.Fa buf . 711.\" 712.It Fn m_copyback mbuf offset len buf 713Copy 714.Fa len 715bytes from the buffer 716.Fa buf 717back into the indicated 718.Vt mbuf chain , 719starting at 720.Fa offset 721bytes from the beginning of the 722.Vt mbuf chain , 723extending the 724.Vt mbuf chain 725if necessary. 726.Sy Note : 727It does not allocate any 728.Vt mbuf clusters , 729just adds 730.Vt mbufs 731to the 732.Vt mbuf chain . 733It is safe to set 734.Fa offset 735beyond the current 736.Vt mbuf chain 737end: zeroed 738.Vt mbufs 739will be allocated to fill the space. 740.\" 741.It Fn m_length mbuf last 742Return the length of the 743.Vt mbuf chain , 744and optionally a pointer to the last 745.Vt mbuf . 746.\" 747.It Fn m_dup_pkthdr to from how 748Upon the function's completion, the 749.Vt mbuf 750.Fa to 751will contain an identical copy of 752.Fa from->m_pkthdr 753and the per-packet attributes found in the 754.Vt mbuf chain 755.Fa from . 756The 757.Vt mbuf 758.Fa from 759must have the flag 760.Dv M_PKTHDR 761initially set, and 762.Fa to 763must be empty on entry. 764.\" 765.It Fn m_move_pkthdr to from 766Move 767.Va m_pkthdr 768and the per-packet attributes from the 769.Vt mbuf chain 770.Fa from 771to the 772.Vt mbuf 773.Fa to . 774The 775.Vt mbuf 776.Fa from 777must have the flag 778.Dv M_PKTHDR 779initially set, and 780.Fa to 781must be empty on entry. 782Upon the function's completion, 783.Fa from 784will have the flag 785.Dv M_PKTHDR 786and the per-packet attributes cleared. 787.\" 788.It Fn m_fixhdr mbuf 789Set the packet-header length to the length of the 790.Vt mbuf chain . 791.\" 792.It Fn m_devget buf len offset ifp copy 793Copy data from a device local memory pointed to by 794.Fa buf 795to an 796.Vt mbuf chain . 797The copy is done using a specified copy routine 798.Fa copy , 799or 800.Fn bcopy 801if 802.Fa copy 803is 804.Dv NULL . 805.\" 806.It Fn m_cat m n 807Concatenate 808.Fa n 809to 810.Fa m . 811Both 812.Vt mbuf chains 813must be of the same type. 814.Fa N 815is still valid after the function returned. 816.Sy Note : 817It does not handle 818.Dv M_PKTHDR 819and friends. 820.\" 821.It Fn m_split mbuf len how 822Partition an 823.Vt mbuf chain 824in two pieces, returning the tail: 825all but the first 826.Fa len 827bytes. 828In case of failure, it returns 829.Dv NULL 830and attempts to restore the 831.Vt mbuf chain 832to its original state. 833.\" 834.It Fn m_apply mbuf off len f arg 835Apply a function to an 836.Vt mbuf chain , 837at offset 838.Fa off , 839for length 840.Fa len 841bytes. 842Typically used to avoid calls to 843.Fn m_pullup 844which would otherwise be unnecessary or undesirable. 845.Fa arg 846is a convenience argument which is passed to the callback function 847.Fa f . 848.Pp 849Each time 850.Fn f 851is called, it will be passed 852.Fa arg , 853a pointer to the 854.Fa data 855in the current mbuf, and the length 856.Fa len 857of the data in this mbuf to which the function should be applied. 858.Pp 859The function should return zero to indicate success; 860otherwise, if an error is indicated, then 861.Fn m_apply 862will return the error and stop iterating through the 863.Vt mbuf chain . 864.\" 865.It Fn m_getptr mbuf loc off 866Return a pointer to the mbuf containing the data located at 867.Fa loc 868bytes from the beginning of the 869.Vt mbuf chain . 870The corresponding offset into the mbuf will be stored in 871.Fa *off . 872.It Fn m_defrag m0 how 873Defragment an mbuf chain, returning the shortest possible 874chain of mbufs and clusters. 875If allocation fails and this can not be completed, 876.Dv NULL 877will be returned and the original chain will be unchanged. 878Upon success, the original chain will be freed and the new 879chain will be returned. 880.Fa how 881should be either 882.Dv M_TRYWAIT 883or 884.Dv M_DONTWAIT , 885depending on the caller's preference. 886.Pp 887This function is especially useful in network drivers, where 888certain long mbuf chains must be shortened before being added 889to TX descriptor lists. 890.It Fn m_unshare m0 how 891Create a version of the specified mbuf chain whose 892contents can be safely modified without affecting other users. 893If allocation fails and this operation can not be completed, 894.Dv NULL 895will be returned. 896The original mbuf chain is always reclaimed and the reference 897count of any shared mbuf clusters is decremented. 898.Fa how 899should be either 900.Dv M_TRYWAIT 901or 902.Dv M_DONTWAIT , 903depending on the caller's preference. 904As a side-effect of this process the returned 905mbuf chain may be compacted. 906.Pp 907This function is especially useful in the transmit path of 908network code, when data must be encrypted or otherwise 909altered prior to transmission. 910.El 911.Sh HARDWARE-ASSISTED CHECKSUM CALCULATION 912This section currently applies to TCP/IP only. 913In order to save the host CPU resources, computing checksums is 914offloaded to the network interface hardware if possible. 915The 916.Va m_pkthdr 917member of the leading 918.Vt mbuf 919of a packet contains two fields used for that purpose, 920.Vt int Va csum_flags 921and 922.Vt int Va csum_data . 923The meaning of those fields depends on the direction a packet flows in, 924and on whether the packet is fragmented. 925Henceforth, 926.Va csum_flags 927or 928.Va csum_data 929of a packet 930will denote the corresponding field of the 931.Va m_pkthdr 932member of the leading 933.Vt mbuf 934in the 935.Vt mbuf chain 936containing the packet. 937.Pp 938On output, checksum offloading is attempted after the outgoing 939interface has been determined for a packet. 940The interface-specific field 941.Va ifnet.if_data.ifi_hwassist 942(see 943.Xr ifnet 9 ) 944is consulted for the capabilities of the interface to assist in 945computing checksums. 946The 947.Va csum_flags 948field of the packet header is set to indicate which actions the interface 949is supposed to perform on it. 950The actions unsupported by the network interface are done in the 951software prior to passing the packet down to the interface driver; 952such actions will never be requested through 953.Va csum_flags . 954.Pp 955The flags demanding a particular action from an interface are as follows: 956.Bl -tag -width ".Dv CSUM_TCP" -offset indent 957.It Dv CSUM_IP 958The IP header checksum is to be computed and stored in the 959corresponding field of the packet. 960The hardware is expected to know the format of an IP header 961to determine the offset of the IP checksum field. 962.It Dv CSUM_TCP 963The TCP checksum is to be computed. 964(See below.) 965.It Dv CSUM_UDP 966The UDP checksum is to be computed. 967(See below.) 968.El 969.Pp 970Should a TCP or UDP checksum be offloaded to the hardware, 971the field 972.Va csum_data 973will contain the byte offset of the checksum field relative to the 974end of the IP header. 975In this case, the checksum field will be initially 976set by the TCP/IP module to the checksum of the pseudo header 977defined by the TCP and UDP specifications. 978.Pp 979For outbound packets which have been fragmented 980by the host CPU, the following will also be true, 981regardless of the checksum flag settings: 982.Bl -bullet -offset indent 983.It 984all fragments will have the flag 985.Dv M_FRAG 986set in their 987.Va m_flags 988field; 989.It 990the first and the last fragments in the chain will have 991.Dv M_FIRSTFRAG 992or 993.Dv M_LASTFRAG 994set in their 995.Va m_flags , 996correspondingly; 997.It 998the first fragment in the chain will have the total number 999of fragments contained in its 1000.Va csum_data 1001field. 1002.El 1003.Pp 1004The last rule for fragmented packets takes precedence over the one 1005for a TCP or UDP checksum. 1006Nevertheless, offloading a TCP or UDP checksum is possible for a 1007fragmented packet if the flag 1008.Dv CSUM_IP_FRAGS 1009is set in the field 1010.Va ifnet.if_data.ifi_hwassist 1011associated with the network interface. 1012However, in this case the interface is expected to figure out 1013the location of the checksum field within the sequence of fragments 1014by itself because 1015.Va csum_data 1016contains a fragment count instead of a checksum offset value. 1017.Pp 1018On input, an interface indicates the actions it has performed 1019on a packet by setting one or more of the following flags in 1020.Va csum_flags 1021associated with the packet: 1022.Bl -tag -width ".Dv CSUM_IP_CHECKED" -offset indent 1023.It Dv CSUM_IP_CHECKED 1024The IP header checksum has been computed. 1025.It Dv CSUM_IP_VALID 1026The IP header has a valid checksum. 1027This flag can appear only in combination with 1028.Dv CSUM_IP_CHECKED . 1029.It Dv CSUM_DATA_VALID 1030The checksum of the data portion of the IP packet has been computed 1031and stored in the field 1032.Va csum_data 1033in network byte order. 1034.It Dv CSUM_PSEUDO_HDR 1035Can be set only along with 1036.Dv CSUM_DATA_VALID 1037to indicate that the IP data checksum found in 1038.Va csum_data 1039allows for the pseudo header defined by the TCP and UDP specifications. 1040Otherwise the checksum of the pseudo header must be calculated by 1041the host CPU and added to 1042.Va csum_data 1043to obtain the final checksum to be used for TCP or UDP validation purposes. 1044.El 1045.Pp 1046If a particular network interface just indicates success or 1047failure of TCP or UDP checksum validation without returning 1048the exact value of the checksum to the host CPU, its driver can mark 1049.Dv CSUM_DATA_VALID 1050and 1051.Dv CSUM_PSEUDO_HDR 1052in 1053.Va csum_flags , 1054and set 1055.Va csum_data 1056to 1057.Li 0xFFFF 1058hexadecimal to indicate a valid checksum. 1059It is a peculiarity of the algorithm used that the Internet checksum 1060calculated over any valid packet will be 1061.Li 0xFFFF 1062as long as the original checksum field is included. 1063.Pp 1064For inbound packets which are IP fragments, all 1065.Va csum_data 1066fields will be summed during reassembly to obtain the final checksum 1067value passed to an upper layer in the 1068.Va csum_data 1069field of the reassembled packet. 1070The 1071.Va csum_flags 1072fields of all fragments will be consolidated using logical AND 1073to obtain the final value for 1074.Va csum_flags . 1075Thus, in order to successfully 1076offload checksum computation for fragmented data, 1077all fragments should have the same value of 1078.Va csum_flags . 1079.Sh STRESS TESTING 1080When running a kernel compiled with the option 1081.Dv MBUF_STRESS_TEST , 1082the following 1083.Xr sysctl 8 Ns 1084-controlled options may be used to create 1085various failure/extreme cases for testing of network drivers 1086and other parts of the kernel that rely on 1087.Vt mbufs . 1088.Bl -tag -width ident 1089.It Va net.inet.ip.mbuf_frag_size 1090Causes 1091.Fn ip_output 1092to fragment outgoing 1093.Vt mbuf chains 1094into fragments of the specified size. 1095Setting this variable to 1 is an excellent way to 1096test the long 1097.Vt mbuf chain 1098handling ability of network drivers. 1099.It Va kern.ipc.m_defragrandomfailures 1100Causes the function 1101.Fn m_defrag 1102to randomly fail, returning 1103.Dv NULL . 1104Any piece of code which uses 1105.Fn m_defrag 1106should be tested with this feature. 1107.El 1108.Sh RETURN VALUES 1109See above. 1110.Sh SEE ALSO 1111.Xr ifnet 9 , 1112.Xr mbuf_tags 9 1113.Sh HISTORY 1114.\" Please correct me if I'm wrong 1115.Vt Mbufs 1116appeared in an early version of 1117.Bx . 1118Besides being used for network packets, they were used 1119to store various dynamic structures, such as routing table 1120entries, interface addresses, protocol control blocks, etc. 1121.Sh AUTHORS 1122The original 1123.Nm 1124manual page was written by Yar Tikhiy. 1125