1.\" Copyright (c) 2000 FreeBSD Inc. 2.\" All rights reserved. 3.\" 4.\" Redistribution and use in source and binary forms, with or without 5.\" modification, are permitted provided that the following conditions 6.\" are met: 7.\" 1. Redistributions of source code must retain the above copyright 8.\" notice, this list of conditions and the following disclaimer. 9.\" 2. Redistributions in binary form must reproduce the above copyright 10.\" notice, this list of conditions and the following disclaimer in the 11.\" documentation and/or other materials provided with the distribution. 12.\" 13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16.\" ARE DISCLAIMED. IN NO EVENT SHALL [your name] OR CONTRIBUTORS BE LIABLE 17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23.\" SUCH DAMAGE. 24.\" 25.\" $FreeBSD$ 26.\" 27.Dd July 24, 2006 28.Dt MBUF 9 29.Os 30.\" 31.Sh NAME 32.Nm mbuf 33.Nd "memory management in the kernel IPC subsystem" 34.\" 35.Sh SYNOPSIS 36.In sys/param.h 37.In sys/systm.h 38.In sys/mbuf.h 39.\" 40.Ss Mbuf allocation macros 41.Fn MGET "struct mbuf *mbuf" "int how" "short type" 42.Fn MGETHDR "struct mbuf *mbuf" "int how" "short type" 43.Fn MCLGET "struct mbuf *mbuf" "int how" 44.Fo MEXTADD 45.Fa "struct mbuf *mbuf" 46.Fa "caddr_t buf" 47.Fa "u_int size" 48.Fa "void (*free)(void *opt_args)" 49.Fa "void *opt_args" 50.Fa "short flags" 51.Fa "int type" 52.Fc 53.Fn MEXTFREE "struct mbuf *mbuf" 54.Fn MFREE "struct mbuf *mbuf" "struct mbuf *successor" 55.\" 56.Ss Mbuf utility macros 57.Fn mtod "struct mbuf *mbuf" "type" 58.Fn M_ALIGN "struct mbuf *mbuf" "u_int len" 59.Fn MH_ALIGN "struct mbuf *mbuf" "u_int len" 60.Ft int 61.Fn M_LEADINGSPACE "struct mbuf *mbuf" 62.Ft int 63.Fn M_TRAILINGSPACE "struct mbuf *mbuf" 64.Fn M_MOVE_PKTHDR "struct mbuf *to" "struct mbuf *from" 65.Fn M_PREPEND "struct mbuf *mbuf" "int len" "int how" 66.Fn MCHTYPE "struct mbuf *mbuf" "u_int type" 67.Ft int 68.Fn M_WRITABLE "struct mbuf *mbuf" 69.\" 70.Ss Mbuf allocation functions 71.Ft struct mbuf * 72.Fn m_get "int how" "int type" 73.Ft struct mbuf * 74.Fn m_getm "struct mbuf *orig" "int len" "int how" "int type" 75.Ft struct mbuf * 76.Fn m_getcl "int how" "short type" "int flags" 77.Ft struct mbuf * 78.Fn m_getclr "int how" "int type" 79.Ft struct mbuf * 80.Fn m_gethdr "int how" "int type" 81.Ft struct mbuf * 82.Fn m_free "struct mbuf *mbuf" 83.Ft void 84.Fn m_freem "struct mbuf *mbuf" 85.\" 86.Ss Mbuf utility functions 87.Ft void 88.Fn m_adj "struct mbuf *mbuf" "int len" 89.Ft void 90.Fn m_align "struct mbuf *mbuf" "int len" 91.Ft int 92.Fn m_append "struct mbuf *mbuf" "int len" "c_caddr_t cp" 93.Ft struct mbuf * 94.Fn m_prepend "struct mbuf *mbuf" "int len" "int how" 95.Ft struct mbuf * 96.Fn m_copyup "struct mbuf *mbuf" "int len" "int dstoff" 97.Ft struct mbuf * 98.Fn m_pullup "struct mbuf *mbuf" "int len" 99.Ft struct mbuf * 100.Fn m_copym "struct mbuf *mbuf" "int offset" "int len" "int how" 101.Ft struct mbuf * 102.Fn m_copypacket "struct mbuf *mbuf" "int how" 103.Ft struct mbuf * 104.Fn m_dup "struct mbuf *mbuf" "int how" 105.Ft void 106.Fn m_copydata "const struct mbuf *mbuf" "int offset" "int len" "caddr_t buf" 107.Ft void 108.Fn m_copyback "struct mbuf *mbuf" "int offset" "int len" "caddr_t buf" 109.Ft struct mbuf * 110.Fo m_devget 111.Fa "char *buf" 112.Fa "int len" 113.Fa "int offset" 114.Fa "struct ifnet *ifp" 115.Fa "void (*copy)(char *from, caddr_t to, u_int len)" 116.Fc 117.Ft void 118.Fn m_cat "struct mbuf *m" "struct mbuf *n" 119.Ft u_int 120.Fn m_fixhdr "struct mbuf *mbuf" 121.Ft void 122.Fn m_dup_pkthdr "struct mbuf *to" "struct mbuf *from" 123.Ft void 124.Fn m_move_pkthdr "struct mbuf *to" "struct mbuf *from" 125.Ft u_int 126.Fn m_length "struct mbuf *mbuf" "struct mbuf **last" 127.Ft struct mbuf * 128.Fn m_split "struct mbuf *mbuf" "int len" "int how" 129.Ft int 130.Fn m_apply "struct mbuf *mbuf" "int off" "int len" "int (*f)(void *arg, void *data, u_int len)" "void *arg" 131.Ft struct mbuf * 132.Fn m_getptr "struct mbuf *mbuf" "int loc" "int *off" 133.Ft struct mbuf * 134.Fn m_defrag "struct mbuf *m0" "int how" 135.Ft struct mbuf * 136.Fn m_unshare "struct mbuf *m0" "int how" 137.\" 138.Sh DESCRIPTION 139An 140.Vt mbuf 141is a basic unit of memory management in the kernel IPC subsystem. 142Network packets and socket buffers are stored in 143.Vt mbufs . 144A network packet may span multiple 145.Vt mbufs 146arranged into a 147.Vt mbuf chain 148(linked list), 149which allows adding or trimming 150network headers with little overhead. 151.Pp 152While a developer should not bother with 153.Vt mbuf 154internals without serious 155reason in order to avoid incompatibilities with future changes, it 156is useful to understand the general structure of an 157.Vt mbuf . 158.Pp 159An 160.Vt mbuf 161consists of a variable-sized header and a small internal 162buffer for data. 163The total size of an 164.Vt mbuf , 165.Dv MSIZE , 166is a constant defined in 167.In sys/param.h . 168The 169.Vt mbuf 170header includes: 171.Pp 172.Bl -tag -width "m_nextpkt" -offset indent 173.It Va m_next 174.Pq Vt struct mbuf * 175A pointer to the next 176.Vt mbuf 177in the 178.Vt mbuf chain . 179.It Va m_nextpkt 180.Pq Vt struct mbuf * 181A pointer to the next 182.Vt mbuf chain 183in the queue. 184.It Va m_data 185.Pq Vt caddr_t 186A pointer to data attached to this 187.Vt mbuf . 188.It Va m_len 189.Pq Vt int 190The length of the data. 191.It Va m_type 192.Pq Vt short 193The type of the data. 194.It Va m_flags 195.Pq Vt int 196The 197.Vt mbuf 198flags. 199.El 200.Pp 201The 202.Vt mbuf 203flag bits are defined as follows: 204.Bd -literal 205/* mbuf flags */ 206#define M_EXT 0x0001 /* has associated external storage */ 207#define M_PKTHDR 0x0002 /* start of record */ 208#define M_EOR 0x0004 /* end of record */ 209#define M_RDONLY 0x0008 /* associated data marked read-only */ 210#define M_PROTO1 0x0010 /* protocol-specific */ 211#define M_PROTO2 0x0020 /* protocol-specific */ 212#define M_PROTO3 0x0040 /* protocol-specific */ 213#define M_PROTO4 0x0080 /* protocol-specific */ 214#define M_PROTO5 0x0100 /* protocol-specific */ 215#define M_PROTO6 0x4000 /* protocol-specific (avoid M_BCAST conflict) */ 216#define M_FREELIST 0x8000 /* mbuf is on the free list */ 217 218/* mbuf pkthdr flags (also stored in m_flags) */ 219#define M_BCAST 0x0200 /* send/received as link-level broadcast */ 220#define M_MCAST 0x0400 /* send/received as link-level multicast */ 221#define M_FRAG 0x0800 /* packet is fragment of larger packet */ 222#define M_FIRSTFRAG 0x1000 /* packet is first fragment */ 223#define M_LASTFRAG 0x2000 /* packet is last fragment */ 224.Ed 225.Pp 226The available 227.Vt mbuf 228types are defined as follows: 229.Bd -literal 230/* mbuf types */ 231#define MT_DATA 1 /* dynamic (data) allocation */ 232#define MT_HEADER MT_DATA /* packet header */ 233#define MT_SONAME 8 /* socket name */ 234#define MT_CONTROL 14 /* extra-data protocol message */ 235#define MT_OOBDATA 15 /* expedited data */ 236.Ed 237.Pp 238If the 239.Dv M_PKTHDR 240flag is set, a 241.Vt struct pkthdr Va m_pkthdr 242is added to the 243.Vt mbuf 244header. 245It contains a pointer to the interface 246the packet has been received from 247.Pq Vt struct ifnet Va *rcvif , 248and the total packet length 249.Pq Vt int Va len . 250Optionally, it may also contain an attached list of packet tags 251.Pq Vt "struct m_tag" . 252See 253.Xr mbuf_tags 9 254for details. 255Fields used in offloading checksum calculation to the hardware are kept in 256.Va m_pkthdr 257as well. 258See 259.Sx HARDWARE-ASSISTED CHECKSUM CALCULATION 260for details. 261.Pp 262If small enough, data is stored in the internal data buffer of an 263.Vt mbuf . 264If the data is sufficiently large, another 265.Vt mbuf 266may be added to the 267.Vt mbuf chain , 268or external storage may be associated with the 269.Vt mbuf . 270.Dv MHLEN 271bytes of data can fit into an 272.Vt mbuf 273with the 274.Dv M_PKTHDR 275flag set, 276.Dv MLEN 277bytes can otherwise. 278.Pp 279If external storage is being associated with an 280.Vt mbuf , 281the 282.Va m_ext 283header is added at the cost of losing the internal data buffer. 284It includes a pointer to external storage, the size of the storage, 285a pointer to a function used for freeing the storage, 286a pointer to an optional argument that can be passed to the function, 287and a pointer to a reference counter. 288An 289.Vt mbuf 290using external storage has the 291.Dv M_EXT 292flag set. 293.Pp 294The system supplies a macro for allocating the desired external storage 295buffer, 296.Dv MEXTADD . 297.Pp 298The allocation and management of the reference counter is handled by the 299subsystem. 300.Pp 301The system also supplies a default type of external storage buffer called an 302.Vt mbuf cluster . 303.Vt Mbuf clusters 304can be allocated and configured with the use of the 305.Dv MCLGET 306macro. 307Each 308.Vt mbuf cluster 309is 310.Dv MCLBYTES 311in size, where MCLBYTES is a machine-dependent constant. 312The system defines an advisory macro 313.Dv MINCLSIZE , 314which is the smallest amount of data to put into an 315.Vt mbuf cluster . 316It is equal to the sum of 317.Dv MLEN 318and 319.Dv MHLEN . 320It is typically preferable to store data into the data region of an 321.Vt mbuf , 322if size permits, as opposed to allocating a separate 323.Vt mbuf cluster 324to hold the same data. 325.\" 326.Ss Macros and Functions 327There are numerous predefined macros and functions that provide the 328developer with common utilities. 329.\" 330.Bl -ohang -offset indent 331.It Fn mtod mbuf type 332Convert an 333.Fa mbuf 334pointer to a data pointer. 335The macro expands to the data pointer cast to the pointer of the specified 336.Fa type . 337.Sy Note : 338It is advisable to ensure that there is enough contiguous data in 339.Fa mbuf . 340See 341.Fn m_pullup 342for details. 343.It Fn MGET mbuf how type 344Allocate an 345.Vt mbuf 346and initialize it to contain internal data. 347.Fa mbuf 348will point to the allocated 349.Vt mbuf 350on success, or be set to 351.Dv NULL 352on failure. 353The 354.Fa how 355argument is to be set to 356.Dv M_TRYWAIT 357or 358.Dv M_DONTWAIT . 359It specifies whether the caller is willing to block if necessary. 360If 361.Fa how 362is set to 363.Dv M_TRYWAIT , 364a failed allocation will result in the caller being put 365to sleep for a designated 366kern.ipc.mbuf_wait 367.Xr ( sysctl 8 368tunable) 369number of ticks. 370A number of other functions and macros related to 371.Vt mbufs 372have the same argument because they may 373at some point need to allocate new 374.Vt mbufs . 375.Pp 376Programmers should be careful not to confuse the 377.Vt mbuf 378allocation flag 379.Dv M_DONTWAIT 380with the 381.Xr malloc 9 382allocation flag, 383.Dv M_NOWAIT . 384They are not the same. 385.It Fn MGETHDR mbuf how type 386Allocate an 387.Vt mbuf 388and initialize it to contain a packet header 389and internal data. 390See 391.Fn MGET 392for details. 393.It Fn MCLGET mbuf how 394Allocate and attach an 395.Vt mbuf cluster 396to 397.Fa mbuf . 398If the macro fails, the 399.Dv M_EXT 400flag will not be set in 401.Fa mbuf . 402.It Fn M_ALIGN mbuf len 403Set the pointer 404.Fa mbuf->m_data 405to place an object of the size 406.Fa len 407at the end of the internal data area of 408.Fa mbuf , 409long word aligned. 410Applicable only if 411.Fa mbuf 412is newly allocated with 413.Fn MGET 414or 415.Fn m_get . 416.It Fn MH_ALIGN mbuf len 417Serves the same purpose as 418.Fn M_ALIGN 419does, but only for 420.Fa mbuf 421newly allocated with 422.Fn MGETHDR 423or 424.Fn m_gethdr , 425or initialized by 426.Fn m_dup_pkthdr 427or 428.Fn m_move_pkthdr . 429.It Fn m_align mbuf len 430Services the same purpose as 431.Fn M_ALIGN 432but handles any type of mbuf. 433.It Fn M_LEADINGSPACE mbuf 434Returns the number of bytes available before the beginning 435of data in 436.Fa mbuf . 437.It Fn M_TRAILINGSPACE mbuf 438Returns the number of bytes available after the end of data in 439.Fa mbuf . 440.It Fn M_PREPEND mbuf len how 441This macro operates on an 442.Vt mbuf chain . 443It is an optimized wrapper for 444.Fn m_prepend 445that can make use of possible empty space before data 446(e.g.\& left after trimming of a link-layer header). 447The new 448.Vt mbuf chain 449pointer or 450.Dv NULL 451is in 452.Fa mbuf 453after the call. 454.It Fn M_MOVE_PKTHDR to from 455Using this macro is equivalent to calling 456.Fn m_move_pkthdr to from . 457.It Fn M_WRITABLE mbuf 458This macro will evaluate true if 459.Fa mbuf 460is not marked 461.Dv M_RDONLY 462and if either 463.Fa mbuf 464does not contain external storage or, 465if it does, 466then if the reference count of the storage is not greater than 1. 467The 468.Dv M_RDONLY 469flag can be set in 470.Fa mbuf->m_flags . 471This can be achieved during setup of the external storage, 472by passing the 473.Dv M_RDONLY 474bit as a 475.Fa flags 476argument to the 477.Fn MEXTADD 478macro, or can be directly set in individual 479.Vt mbufs . 480.It Fn MCHTYPE mbuf type 481Change the type of 482.Fa mbuf 483to 484.Fa type . 485This is a relatively expensive operation and should be avoided. 486.El 487.Pp 488The functions are: 489.Bl -ohang -offset indent 490.It Fn m_get how type 491A function version of 492.Fn MGET 493for non-critical paths. 494.It Fn m_getm orig len how type 495Allocate 496.Fa len 497bytes worth of 498.Vt mbufs 499and 500.Vt mbuf clusters 501if necessary and append the resulting allocated 502.Vt mbuf chain 503to the 504.Vt mbuf chain 505.Fa orig , 506if it is 507.No non- Ns Dv NULL . 508If the allocation fails at any point, 509free whatever was allocated and return 510.Dv NULL . 511If 512.Fa orig 513is 514.No non- Ns Dv NULL , 515it will not be freed. 516It is possible to use 517.Fn m_getm 518to either append 519.Fa len 520bytes to an existing 521.Vt mbuf 522or 523.Vt mbuf chain 524(for example, one which may be sitting in a pre-allocated ring) 525or to simply perform an all-or-nothing 526.Vt mbuf 527and 528.Vt mbuf cluster 529allocation. 530.It Fn m_gethdr how type 531A function version of 532.Fn MGETHDR 533for non-critical paths. 534.It Fn m_getcl how type flags 535Fetch an 536.Vt mbuf 537with a 538.Vt mbuf cluster 539attached to it. 540If one of the allocations fails, the entire allocation fails. 541This routine is the preferred way of fetching both the 542.Vt mbuf 543and 544.Vt mbuf cluster 545together, as it avoids having to unlock/relock between allocations. 546Returns 547.Dv NULL 548on failure. 549.It Fn m_getclr how type 550Allocate an 551.Vt mbuf 552and zero out the data region. 553.It Fn m_free mbuf 554Frees 555.Vt mbuf . 556Returns 557.Va m_next 558of the freed 559.Vt mbuf . 560.El 561.Pp 562The functions below operate on 563.Vt mbuf chains . 564.Bl -ohang -offset indent 565.It Fn m_freem mbuf 566Free an entire 567.Vt mbuf chain , 568including any external storage. 569.\" 570.It Fn m_adj mbuf len 571Trim 572.Fa len 573bytes from the head of an 574.Vt mbuf chain 575if 576.Fa len 577is positive, from the tail otherwise. 578.\" 579.It Fn m_append mbuf len cp 580Append 581.Vt len 582bytes of data 583.Vt cp 584to the 585.Vt mbuf chain . 586Extend the mbuf chain if the new data does not fit in 587existing space. 588.\" 589.It Fn m_prepend mbuf len how 590Allocate a new 591.Vt mbuf 592and prepend it to the 593.Vt mbuf chain , 594handle 595.Dv M_PKTHDR 596properly. 597.Sy Note : 598It does not allocate any 599.Vt mbuf clusters , 600so 601.Fa len 602must be less than 603.Dv MLEN 604or 605.Dv MHLEN , 606depending on the 607.Dv M_PKTHDR 608flag setting. 609.\" 610.It Fn m_copyup mbuf len dstoff 611Similar to 612.Fn m_pullup 613but copies 614.Fa len 615bytes of data into a new mbuf at 616.Fa dstoff 617bytes into the mbuf. 618The 619.Fa dstoff 620argument aligns the data and leaves room for a link layer header. 621Returns the new 622.Vt mbuf chain 623on success, 624and frees the 625.Vt mbuf chain 626and returns 627.Dv NULL 628on failure. 629.Sy Note : 630The function does not allocate 631.Vt mbuf clusters , 632so 633.Fa len + dstoff 634must be less than 635.Dv MHLEN . 636.\" 637.It Fn m_pullup mbuf len 638Arrange that the first 639.Fa len 640bytes of an 641.Vt mbuf chain 642are contiguous and lay in the data area of 643.Fa mbuf , 644so they are accessible with 645.Fn mtod mbuf type . 646It is important to remember that this may involve 647reallocating some mbufs and moving data so all pointers 648referencing data within the old mbuf chain 649must be recalculated or made invalid. 650Return the new 651.Vt mbuf chain 652on success, 653.Dv NULL 654on failure 655(the 656.Vt mbuf chain 657is freed in this case). 658.Sy Note : 659It does not allocate any 660.Vt mbuf clusters , 661so 662.Fa len 663must be less than 664.Dv MHLEN . 665.\" 666.It Fn m_copym mbuf offset len how 667Make a copy of an 668.Vt mbuf chain 669starting 670.Fa offset 671bytes from the beginning, continuing for 672.Fa len 673bytes. 674If 675.Fa len 676is 677.Dv M_COPYALL , 678copy to the end of the 679.Vt mbuf chain . 680.Sy Note : 681The copy is read-only, because the 682.Vt mbuf clusters 683are not copied, only their reference counts are incremented. 684.\" 685.It Fn m_copypacket mbuf how 686Copy an entire packet including header, which must be present. 687This is an optimized version of the common case 688.Fn m_copym mbuf 0 M_COPYALL how . 689.Sy Note : 690the copy is read-only, because the 691.Vt mbuf clusters 692are not copied, only their reference counts are incremented. 693.\" 694.It Fn m_dup mbuf how 695Copy a packet header 696.Vt mbuf chain 697into a completely new 698.Vt mbuf chain , 699including copying any 700.Vt mbuf clusters . 701Use this instead of 702.Fn m_copypacket 703when you need a writable copy of an 704.Vt mbuf chain . 705.\" 706.It Fn m_copydata mbuf offset len buf 707Copy data from an 708.Vt mbuf chain 709starting 710.Fa off 711bytes from the beginning, continuing for 712.Fa len 713bytes, into the indicated buffer 714.Fa buf . 715.\" 716.It Fn m_copyback mbuf offset len buf 717Copy 718.Fa len 719bytes from the buffer 720.Fa buf 721back into the indicated 722.Vt mbuf chain , 723starting at 724.Fa offset 725bytes from the beginning of the 726.Vt mbuf chain , 727extending the 728.Vt mbuf chain 729if necessary. 730.Sy Note : 731It does not allocate any 732.Vt mbuf clusters , 733just adds 734.Vt mbufs 735to the 736.Vt mbuf chain . 737It is safe to set 738.Fa offset 739beyond the current 740.Vt mbuf chain 741end: zeroed 742.Vt mbufs 743will be allocated to fill the space. 744.\" 745.It Fn m_length mbuf last 746Return the length of the 747.Vt mbuf chain , 748and optionally a pointer to the last 749.Vt mbuf . 750.\" 751.It Fn m_dup_pkthdr to from how 752Upon the function's completion, the 753.Vt mbuf 754.Fa to 755will contain an identical copy of 756.Fa from->m_pkthdr 757and the per-packet attributes found in the 758.Vt mbuf chain 759.Fa from . 760The 761.Vt mbuf 762.Fa from 763must have the flag 764.Dv M_PKTHDR 765initially set, and 766.Fa to 767must be empty on entry. 768.\" 769.It Fn m_move_pkthdr to from 770Move 771.Va m_pkthdr 772and the per-packet attributes from the 773.Vt mbuf chain 774.Fa from 775to the 776.Vt mbuf 777.Fa to . 778The 779.Vt mbuf 780.Fa from 781must have the flag 782.Dv M_PKTHDR 783initially set, and 784.Fa to 785must be empty on entry. 786Upon the function's completion, 787.Fa from 788will have the flag 789.Dv M_PKTHDR 790and the per-packet attributes cleared. 791.\" 792.It Fn m_fixhdr mbuf 793Set the packet-header length to the length of the 794.Vt mbuf chain . 795.\" 796.It Fn m_devget buf len offset ifp copy 797Copy data from a device local memory pointed to by 798.Fa buf 799to an 800.Vt mbuf chain . 801The copy is done using a specified copy routine 802.Fa copy , 803or 804.Fn bcopy 805if 806.Fa copy 807is 808.Dv NULL . 809.\" 810.It Fn m_cat m n 811Concatenate 812.Fa n 813to 814.Fa m . 815Both 816.Vt mbuf chains 817must be of the same type. 818.Fa N 819is still valid after the function returned. 820.Sy Note : 821It does not handle 822.Dv M_PKTHDR 823and friends. 824.\" 825.It Fn m_split mbuf len how 826Partition an 827.Vt mbuf chain 828in two pieces, returning the tail: 829all but the first 830.Fa len 831bytes. 832In case of failure, it returns 833.Dv NULL 834and attempts to restore the 835.Vt mbuf chain 836to its original state. 837.\" 838.It Fn m_apply mbuf off len f arg 839Apply a function to an 840.Vt mbuf chain , 841at offset 842.Fa off , 843for length 844.Fa len 845bytes. 846Typically used to avoid calls to 847.Fn m_pullup 848which would otherwise be unnecessary or undesirable. 849.Fa arg 850is a convenience argument which is passed to the callback function 851.Fa f . 852.Pp 853Each time 854.Fn f 855is called, it will be passed 856.Fa arg , 857a pointer to the 858.Fa data 859in the current mbuf, and the length 860.Fa len 861of the data in this mbuf to which the function should be applied. 862.Pp 863The function should return zero to indicate success; 864otherwise, if an error is indicated, then 865.Fn m_apply 866will return the error and stop iterating through the 867.Vt mbuf chain . 868.\" 869.It Fn m_getptr mbuf loc off 870Return a pointer to the mbuf containing the data located at 871.Fa loc 872bytes from the beginning of the 873.Vt mbuf chain . 874The corresponding offset into the mbuf will be stored in 875.Fa *off . 876.It Fn m_defrag m0 how 877Defragment an mbuf chain, returning the shortest possible 878chain of mbufs and clusters. 879If allocation fails and this can not be completed, 880.Dv NULL 881will be returned and the original chain will be unchanged. 882Upon success, the original chain will be freed and the new 883chain will be returned. 884.Fa how 885should be either 886.Dv M_TRYWAIT 887or 888.Dv M_DONTWAIT , 889depending on the caller's preference. 890.Pp 891This function is especially useful in network drivers, where 892certain long mbuf chains must be shortened before being added 893to TX descriptor lists. 894.It Fn m_unshare m0 how 895Create a version of the specified mbuf chain whose 896contents can be safely modified without affecting other users. 897If allocation fails and this operation can not be completed, 898.Dv NULL 899will be returned. 900The original mbuf chain is always reclaimed and the reference 901count of any shared mbuf clusters is decremented. 902.Fa how 903should be either 904.Dv M_TRYWAIT 905or 906.Dv M_DONTWAIT , 907depending on the caller's preference. 908As a side-effect of this process the returned 909mbuf chain may be compacted. 910.Pp 911This function is especially useful in the transmit path of 912network code, when data must be encrypted or otherwise 913altered prior to transmission. 914.El 915.Sh HARDWARE-ASSISTED CHECKSUM CALCULATION 916This section currently applies to TCP/IP only. 917In order to save the host CPU resources, computing checksums is 918offloaded to the network interface hardware if possible. 919The 920.Va m_pkthdr 921member of the leading 922.Vt mbuf 923of a packet contains two fields used for that purpose, 924.Vt int Va csum_flags 925and 926.Vt int Va csum_data . 927The meaning of those fields depends on the direction a packet flows in, 928and on whether the packet is fragmented. 929Henceforth, 930.Va csum_flags 931or 932.Va csum_data 933of a packet 934will denote the corresponding field of the 935.Va m_pkthdr 936member of the leading 937.Vt mbuf 938in the 939.Vt mbuf chain 940containing the packet. 941.Pp 942On output, checksum offloading is attempted after the outgoing 943interface has been determined for a packet. 944The interface-specific field 945.Va ifnet.if_data.ifi_hwassist 946(see 947.Xr ifnet 9 ) 948is consulted for the capabilities of the interface to assist in 949computing checksums. 950The 951.Va csum_flags 952field of the packet header is set to indicate which actions the interface 953is supposed to perform on it. 954The actions unsupported by the network interface are done in the 955software prior to passing the packet down to the interface driver; 956such actions will never be requested through 957.Va csum_flags . 958.Pp 959The flags demanding a particular action from an interface are as follows: 960.Bl -tag -width ".Dv CSUM_TCP" -offset indent 961.It Dv CSUM_IP 962The IP header checksum is to be computed and stored in the 963corresponding field of the packet. 964The hardware is expected to know the format of an IP header 965to determine the offset of the IP checksum field. 966.It Dv CSUM_TCP 967The TCP checksum is to be computed. 968(See below.) 969.It Dv CSUM_UDP 970The UDP checksum is to be computed. 971(See below.) 972.El 973.Pp 974Should a TCP or UDP checksum be offloaded to the hardware, 975the field 976.Va csum_data 977will contain the byte offset of the checksum field relative to the 978end of the IP header. 979In this case, the checksum field will be initially 980set by the TCP/IP module to the checksum of the pseudo header 981defined by the TCP and UDP specifications. 982.Pp 983For outbound packets which have been fragmented 984by the host CPU, the following will also be true, 985regardless of the checksum flag settings: 986.Bl -bullet -offset indent 987.It 988all fragments will have the flag 989.Dv M_FRAG 990set in their 991.Va m_flags 992field; 993.It 994the first and the last fragments in the chain will have 995.Dv M_FIRSTFRAG 996or 997.Dv M_LASTFRAG 998set in their 999.Va m_flags , 1000correspondingly; 1001.It 1002the first fragment in the chain will have the total number 1003of fragments contained in its 1004.Va csum_data 1005field. 1006.El 1007.Pp 1008The last rule for fragmented packets takes precedence over the one 1009for a TCP or UDP checksum. 1010Nevertheless, offloading a TCP or UDP checksum is possible for a 1011fragmented packet if the flag 1012.Dv CSUM_IP_FRAGS 1013is set in the field 1014.Va ifnet.if_data.ifi_hwassist 1015associated with the network interface. 1016However, in this case the interface is expected to figure out 1017the location of the checksum field within the sequence of fragments 1018by itself because 1019.Va csum_data 1020contains a fragment count instead of a checksum offset value. 1021.Pp 1022On input, an interface indicates the actions it has performed 1023on a packet by setting one or more of the following flags in 1024.Va csum_flags 1025associated with the packet: 1026.Bl -tag -width ".Dv CSUM_IP_CHECKED" -offset indent 1027.It Dv CSUM_IP_CHECKED 1028The IP header checksum has been computed. 1029.It Dv CSUM_IP_VALID 1030The IP header has a valid checksum. 1031This flag can appear only in combination with 1032.Dv CSUM_IP_CHECKED . 1033.It Dv CSUM_DATA_VALID 1034The checksum of the data portion of the IP packet has been computed 1035and stored in the field 1036.Va csum_data 1037in network byte order. 1038.It Dv CSUM_PSEUDO_HDR 1039Can be set only along with 1040.Dv CSUM_DATA_VALID 1041to indicate that the IP data checksum found in 1042.Va csum_data 1043allows for the pseudo header defined by the TCP and UDP specifications. 1044Otherwise the checksum of the pseudo header must be calculated by 1045the host CPU and added to 1046.Va csum_data 1047to obtain the final checksum to be used for TCP or UDP validation purposes. 1048.El 1049.Pp 1050If a particular network interface just indicates success or 1051failure of TCP or UDP checksum validation without returning 1052the exact value of the checksum to the host CPU, its driver can mark 1053.Dv CSUM_DATA_VALID 1054and 1055.Dv CSUM_PSEUDO_HDR 1056in 1057.Va csum_flags , 1058and set 1059.Va csum_data 1060to 1061.Li 0xFFFF 1062hexadecimal to indicate a valid checksum. 1063It is a peculiarity of the algorithm used that the Internet checksum 1064calculated over any valid packet will be 1065.Li 0xFFFF 1066as long as the original checksum field is included. 1067.Pp 1068For inbound packets which are IP fragments, all 1069.Va csum_data 1070fields will be summed during reassembly to obtain the final checksum 1071value passed to an upper layer in the 1072.Va csum_data 1073field of the reassembled packet. 1074The 1075.Va csum_flags 1076fields of all fragments will be consolidated using logical AND 1077to obtain the final value for 1078.Va csum_flags . 1079Thus, in order to successfully 1080offload checksum computation for fragmented data, 1081all fragments should have the same value of 1082.Va csum_flags . 1083.Sh STRESS TESTING 1084When running a kernel compiled with the option 1085.Dv MBUF_STRESS_TEST , 1086the following 1087.Xr sysctl 8 Ns 1088-controlled options may be used to create 1089various failure/extreme cases for testing of network drivers 1090and other parts of the kernel that rely on 1091.Vt mbufs . 1092.Bl -tag -width ident 1093.It Va net.inet.ip.mbuf_frag_size 1094Causes 1095.Fn ip_output 1096to fragment outgoing 1097.Vt mbuf chains 1098into fragments of the specified size. 1099Setting this variable to 1 is an excellent way to 1100test the long 1101.Vt mbuf chain 1102handling ability of network drivers. 1103.It Va kern.ipc.m_defragrandomfailures 1104Causes the function 1105.Fn m_defrag 1106to randomly fail, returning 1107.Dv NULL . 1108Any piece of code which uses 1109.Fn m_defrag 1110should be tested with this feature. 1111.El 1112.Sh RETURN VALUES 1113See above. 1114.Sh SEE ALSO 1115.Xr ifnet 9 , 1116.Xr mbuf_tags 9 1117.Sh HISTORY 1118.\" Please correct me if I'm wrong 1119.Vt Mbufs 1120appeared in an early version of 1121.Bx . 1122Besides being used for network packets, they were used 1123to store various dynamic structures, such as routing table 1124entries, interface addresses, protocol control blocks, etc. 1125.Sh AUTHORS 1126The original 1127.Nm 1128manual page was written by Yar Tikhiy. 1129