1.\" Copyright (c) 2000 FreeBSD Inc. 2.\" All rights reserved. 3.\" 4.\" Redistribution and use in source and binary forms, with or without 5.\" modification, are permitted provided that the following conditions 6.\" are met: 7.\" 1. Redistributions of source code must retain the above copyright 8.\" notice, this list of conditions and the following disclaimer. 9.\" 2. Redistributions in binary form must reproduce the above copyright 10.\" notice, this list of conditions and the following disclaimer in the 11.\" documentation and/or other materials provided with the distribution. 12.\" 13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16.\" ARE DISCLAIMED. IN NO EVENT SHALL [your name] OR CONTRIBUTORS BE LIABLE 17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23.\" SUCH DAMAGE. 24.\" 25.\" $FreeBSD$ 26.\" 27.Dd May 20, 2004 28.Dt MBUF 9 29.Os 30.\" 31.Sh NAME 32.Nm mbuf 33.Nd "memory management in the kernel IPC subsystem" 34.\" 35.Sh SYNOPSIS 36.In sys/param.h 37.In sys/systm.h 38.In sys/mbuf.h 39.\" 40.Ss Mbuf allocation macros 41.Fn MGET "struct mbuf *mbuf" "int how" "short type" 42.Fn MGETHDR "struct mbuf *mbuf" "int how" "short type" 43.Fn MCLGET "struct mbuf *mbuf" "int how" 44.Fo MEXTADD 45.Fa "struct mbuf *mbuf" 46.Fa "caddr_t buf" 47.Fa "u_int size" 48.Fa "void (*free)(void *opt_args)" 49.Fa "void *opt_args" 50.Fa "short flags" 51.Fa "int type" 52.Fc 53.Fn MEXTFREE "struct mbuf *mbuf" 54.Fn MEXT_ADD_REF "struct mbuf *mbuf" 55.Fn MEXT_REM_REF "struct mbuf *mbuf" 56.Fn MFREE "struct mbuf *mbuf" "struct mbuf *successor" 57.\" 58.Ss Mbuf utility macros 59.Fn mtod "struct mbuf *mbuf" "type" 60.Ft int 61.Fn MEXT_IS_REF "struct mbuf *mbuf" 62.Fn M_ALIGN "struct mbuf *mbuf" "u_int len" 63.Fn MH_ALIGN "struct mbuf *mbuf" "u_int len" 64.Ft int 65.Fn M_LEADINGSPACE "struct mbuf *mbuf" 66.Ft int 67.Fn M_TRAILINGSPACE "struct mbuf *mbuf" 68.Fn M_MOVE_PKTHDR "struct mbuf *to" "struct mbuf *from" 69.Fn M_PREPEND "struct mbuf *mbuf" "int len" "int how" 70.Fn MCHTYPE "struct mbuf *mbuf" "u_int type" 71.Ft int 72.Fn M_WRITABLE "struct mbuf *mbuf" 73.\" 74.Ss Mbuf allocation functions 75.Ft struct mbuf * 76.Fn m_get "int how" "int type" 77.Ft struct mbuf * 78.Fn m_getm "struct mbuf *orig" "int len" "int how" "int type" 79.Ft struct mbuf * 80.Fn m_getcl "int how" "short type" "int flags" 81.Ft struct mbuf * 82.Fn m_getclr "int how" "int type" 83.Ft struct mbuf * 84.Fn m_gethdr "int how" "int type" 85.Ft struct mbuf * 86.Fn m_free "struct mbuf *mbuf" 87.Ft void 88.Fn m_freem "struct mbuf *mbuf" 89.\" 90.Ss Mbuf utility functions 91.Ft void 92.Fn m_adj "struct mbuf *mbuf" "int len" 93.Ft struct mbuf * 94.Fn m_prepend "struct mbuf *mbuf" "int len" "int how" 95.Ft struct mbuf * 96.Fn m_pullup "struct mbuf *mbuf" "int len" 97.Ft struct mbuf * 98.Fn m_copym "struct mbuf *mbuf" "int offset" "int len" "int how" 99.Ft struct mbuf * 100.Fn m_copypacket "struct mbuf *mbuf" "int how" 101.Ft struct mbuf * 102.Fn m_dup "struct mbuf *mbuf" "int how" 103.Ft void 104.Fn m_copydata "const struct mbuf *mbuf" "int offset" "int len" "caddr_t buf" 105.Ft void 106.Fn m_copyback "struct mbuf *mbuf" "int offset" "int len" "caddr_t buf" 107.Ft struct mbuf * 108.Fo m_devget 109.Fa "char *buf" 110.Fa "int len" 111.Fa "int offset" 112.Fa "struct ifnet *ifp" 113.Fa "void (*copy)(char *from, caddr_t to, u_int len)" 114.Fc 115.Ft void 116.Fn m_cat "struct mbuf *m" "struct mbuf *n" 117.Ft u_int 118.Fn m_fixhdr "struct mbuf *mbuf" 119.Ft void 120.Fn m_dup_pkthdr "struct mbuf *to" "struct mbuf *from" 121.Ft void 122.Fn m_move_pkthdr "struct mbuf *to" "struct mbuf *from" 123.Ft u_int 124.Fn m_length "struct mbuf *mbuf" "struct mbuf **last" 125.Ft struct mbuf * 126.Fn m_split "struct mbuf *mbuf" "int len" "int how" 127.Ft int 128.Fn m_apply "struct mbuf *mbuf" "int off" "int len" "int (*f)(void *arg, void *data, u_int len)" "void *arg" 129.Ft struct mbuf * 130.Fn m_getptr "struct mbuf *mbuf" "int loc" "int *off" 131.Ft struct mbuf * 132.Fn m_defrag "struct mbuf *m0" "int how" 133.\" 134.Sh DESCRIPTION 135An 136.Vt mbuf 137is a basic unit of memory management in the kernel IPC subsystem. 138Network packets and socket buffers are stored in 139.Vt mbufs . 140A network packet may span multiple 141.Vt mbufs 142arranged into a 143.Vt mbuf chain 144(linked list), 145which allows adding or trimming 146network headers with little overhead. 147.Pp 148While a developer should not bother with 149.Vt mbuf 150internals without serious 151reason in order to avoid incompatibilities with future changes, it 152is useful to understand the general structure of an 153.Vt mbuf . 154.Pp 155An 156.Vt mbuf 157consists of a variable-sized header and a small internal 158buffer for data. 159The total size of an 160.Vt mbuf , 161.Dv MSIZE , 162is a constant defined in 163.In sys/param.h . 164The 165.Vt mbuf 166header includes: 167.Pp 168.Bl -tag -width "m_nextpkt" -offset indent 169.It Va m_next 170.Pq Vt struct mbuf * 171A pointer to the next 172.Vt mbuf 173in the 174.Vt mbuf chain . 175.It Va m_nextpkt 176.Pq Vt struct mbuf * 177A pointer to the next 178.Vt mbuf chain 179in the queue. 180.It Va m_data 181.Pq Vt caddr_t 182A pointer to data attached to this 183.Vt mbuf . 184.It Va m_len 185.Pq Vt int 186The length of the data. 187.It Va m_type 188.Pq Vt short 189The type of the data. 190.It Va m_flags 191.Pq Vt int 192The 193.Vt mbuf 194flags. 195.El 196.Pp 197The 198.Vt mbuf 199flag bits are defined as follows: 200.Bd -literal 201/* mbuf flags */ 202#define M_EXT 0x0001 /* has associated external storage */ 203#define M_PKTHDR 0x0002 /* start of record */ 204#define M_EOR 0x0004 /* end of record */ 205#define M_RDONLY 0x0008 /* associated data marked read-only */ 206#define M_PROTO1 0x0010 /* protocol-specific */ 207#define M_PROTO2 0x0020 /* protocol-specific */ 208#define M_PROTO3 0x0040 /* protocol-specific */ 209#define M_PROTO4 0x0080 /* protocol-specific */ 210#define M_PROTO5 0x0100 /* protocol-specific */ 211#define M_PROTO6 0x4000 /* protocol-specific (avoid M_BCAST conflict) */ 212#define M_FREELIST 0x8000 /* mbuf is on the free list */ 213 214/* mbuf pkthdr flags (also stored in m_flags) */ 215#define M_BCAST 0x0200 /* send/received as link-level broadcast */ 216#define M_MCAST 0x0400 /* send/received as link-level multicast */ 217#define M_FRAG 0x0800 /* packet is fragment of larger packet */ 218#define M_FIRSTFRAG 0x1000 /* packet is first fragment */ 219#define M_LASTFRAG 0x2000 /* packet is last fragment */ 220.Ed 221.Pp 222The available 223.Vt mbuf 224types are defined as follows: 225.Bd -literal 226/* mbuf types */ 227#define MT_DATA 1 /* dynamic (data) allocation */ 228#define MT_HEADER 2 /* packet header */ 229#define MT_SONAME 8 /* socket name */ 230#define MT_FTABLE 11 /* fragment reassembly header */ 231#define MT_TAG 13 /* volatile metadata associated to pkts */ 232#define MT_CONTROL 14 /* extra-data protocol message */ 233#define MT_OOBDATA 15 /* expedited data */ 234.Ed 235.Pp 236If the 237.Dv M_PKTHDR 238flag is set, a 239.Vt struct pkthdr Va m_pkthdr 240is added to the 241.Vt mbuf 242header. 243It contains a pointer to the interface 244the packet has been received from 245.Pq Vt struct ifnet Va *rcvif , 246and the total packet length 247.Pq Vt int Va len . 248Optionally, it may also contain an attached list of packet tags 249.Pq Vt "struct m_tag" . 250See 251.Xr mbuf_tags 9 252for details. 253Fields used in offloading checksum calculation to the hardware are kept in 254.Va m_pkthdr 255as well. 256See 257.Sx HARDWARE-ASSISTED CHECKSUM CALCULATION 258for details. 259.Pp 260If small enough, data is stored in the internal data buffer of an 261.Vt mbuf . 262If the data is sufficiently large, another 263.Vt mbuf 264may be added to the 265.Vt mbuf chain , 266or external storage may be associated with the 267.Vt mbuf . 268.Dv MHLEN 269bytes of data can fit into an 270.Vt mbuf 271with the 272.Dv M_PKTHDR 273flag set, 274.Dv MLEN 275bytes can otherwise. 276.Pp 277If external storage is being associated with an 278.Vt mbuf , 279the 280.Va m_ext 281header is added at the cost of losing the internal data buffer. 282It includes a pointer to external storage, the size of the storage, 283a pointer to a function used for freeing the storage, 284a pointer to an optional argument that can be passed to the function, 285and a pointer to a reference counter. 286An 287.Vt mbuf 288using external storage has the 289.Dv M_EXT 290flag set. 291.Pp 292The system supplies a macro for allocating the desired external storage 293buffer, 294.Dv MEXTADD . 295.Pp 296The allocation and management of the reference counter is handled by the 297subsystem. 298The developer can check whether the reference count for the 299external storage of a given 300.Vt mbuf 301is greater than 1 with the 302.Dv MEXT_IS_REF 303macro. 304Similarly, the developer can directly add and remove references, 305if absolutely necessary, with the use of the 306.Dv MEXT_ADD_REF 307and 308.Dv MEXT_REM_REF 309macros. 310.Pp 311The system also supplies a default type of external storage buffer called an 312.Vt mbuf cluster . 313.Vt Mbuf clusters 314can be allocated and configured with the use of the 315.Dv MCLGET 316macro. 317Each 318.Vt mbuf cluster 319is 320.Dv MCLBYTES 321in size, where MCLBYTES is a machine-dependent constant. 322The system defines an advisory macro 323.Dv MINCLSIZE , 324which is the smallest amount of data to put into an 325.Vt mbuf cluster . 326It's equal to the sum of 327.Dv MLEN 328and 329.Dv MHLEN . 330It is typically preferable to store data into the data region of an 331.Vt mbuf , 332if size permits, as opposed to allocating a separate 333.Vt mbuf cluster 334to hold the same data. 335.\" 336.Ss Macros and Functions 337There are numerous predefined macros and functions that provide the 338developer with common utilities. 339.\" 340.Bl -ohang -offset indent 341.It Fn mtod mbuf type 342Convert an 343.Fa mbuf 344pointer to a data pointer. 345The macro expands to the data pointer cast to the pointer of the specified 346.Fa type . 347.Sy Note : 348It is advisable to ensure that there is enough contiguous data in 349.Fa mbuf . 350See 351.Fn m_pullup 352for details. 353.It Fn MGET mbuf how type 354Allocate an 355.Vt mbuf 356and initialize it to contain internal data. 357.Fa mbuf 358will point to the allocated 359.Vt mbuf 360on success, or be set to 361.Dv NULL 362on failure. 363The 364.Fa how 365argument is to be set to 366.Dv M_TRYWAIT 367or 368.Dv M_DONTWAIT . 369It specifies whether the caller is willing to block if necessary. 370If 371.Fa how 372is set to 373.Dv M_TRYWAIT , 374a failed allocation will result in the caller being put 375to sleep for a designated 376kern.ipc.mbuf_wait 377.Xr ( sysctl 8 378tunable) 379number of ticks. 380A number of other functions and macros related to 381.Vt mbufs 382have the same argument because they may 383at some point need to allocate new 384.Vt mbufs . 385.Pp 386Programmers should be careful not to confuse the 387.Vt mbuf 388allocation flag 389.Dv M_DONTWAIT 390with the 391.Xr malloc 9 392allocation flag, 393.Dv M_NOWAIT . 394They are not the same. 395.It Fn MGETHDR mbuf how type 396Allocate an 397.Vt mbuf 398and initialize it to contain a packet header 399and internal data. 400See 401.Fn MGET 402for details. 403.It Fn MCLGET mbuf how 404Allocate and attach an 405.Vt mbuf cluster 406to 407.Fa mbuf . 408If the macro fails, the 409.Dv M_EXT 410flag won't be set in 411.Fa mbuf . 412.It Fn M_ALIGN mbuf len 413Set the pointer 414.Fa mbuf->m_data 415to place an object of the size 416.Fa len 417at the end of the internal data area of 418.Fa mbuf , 419long word aligned. 420Applicable only if 421.Fa mbuf 422is newly allocated with 423.Fn MGET 424or 425.Fn m_get . 426.It Fn MH_ALIGN mbuf len 427Serves the same purpose as 428.Fn M_ALIGN 429does, but only for 430.Fa mbuf 431newly allocated with 432.Fn MGETHDR 433or 434.Fn m_gethdr , 435or initialized by 436.Fn m_dup_pkthdr 437or 438.Fn m_move_pkthdr . 439.It Fn M_LEADINGSPACE mbuf 440Returns the number of bytes available before the beginning 441of data in 442.Fa mbuf . 443.It Fn M_TRAILINGSPACE mbuf 444Returns the number of bytes available after the end of data in 445.Fa mbuf . 446.It Fn M_PREPEND mbuf len how 447This macro operates on an 448.Vt mbuf chain . 449It is an optimized wrapper for 450.Fn m_prepend 451that can make use of possible empty space before data 452(e.g.\& left after trimming of a link-layer header). 453The new 454.Vt mbuf chain 455pointer or 456.Dv NULL 457is in 458.Fa mbuf 459after the call. 460.It Fn M_MOVE_PKTHDR to from 461Using this macro is equivalent to calling 462.Fn m_move_pkthdr to from . 463.It Fn M_WRITABLE mbuf 464This macro will evaluate true if 465.Fa mbuf 466is not marked 467.Dv M_RDONLY 468and if either 469.Fa mbuf 470does not contain external storage or, 471if it does, 472then if the reference count of the storage is not greater than 1. 473The 474.Dv M_RDONLY 475flag can be set in 476.Fa mbuf->m_flags . 477This can be achieved during setup of the external storage, 478by passing the 479.Dv M_RDONLY 480bit as a 481.Fa flags 482argument to the 483.Fn MEXTADD 484macro, or can be directly set in individual 485.Vt mbufs . 486.It Fn MCHTYPE mbuf type 487Change the type of 488.Fa mbuf 489to 490.Fa type . 491This is a relatively expensive operation and should be avoided. 492.El 493.Pp 494The functions are: 495.Bl -ohang -offset indent 496.It Fn m_get how type 497A function version of 498.Fn MGET 499for non-critical paths. 500.It Fn m_getm orig len how type 501Allocate 502.Fa len 503bytes worth of 504.Vt mbufs 505and 506.Vt mbuf clusters 507if necessary and append the resulting allocated 508.Vt mbuf chain 509to the 510.Vt mbuf chain 511.Fa orig , 512if it is 513.No non- Ns Dv NULL . 514If the allocation fails at any point, 515free whatever was allocated and return 516.Dv NULL . 517If 518.Fa orig 519is 520.No non- Ns Dv NULL , 521it will not be freed. 522It is possible to use 523.Fn m_getm 524to either append 525.Fa len 526bytes to an existing 527.Vt mbuf 528or 529.Vt mbuf chain 530(for example, one which may be sitting in a pre-allocated ring) 531or to simply perform an all-or-nothing 532.Vt mbuf 533and 534.Vt mbuf cluster 535allocation. 536.It Fn m_gethdr how type 537A function version of 538.Fn MGETHDR 539for non-critical paths. 540.It Fn m_getcl how type flags 541Fetch an 542.Vt mbuf 543with a 544.Vt mbuf cluster 545attached to it. 546If one of the allocations fails, the entire allocation fails. 547This routine is the preferred way of fetching both the 548.Vt mbuf 549and 550.Vt mbuf cluster 551together, as it avoids having to unlock/relock between allocations. 552Returns 553.Dv NULL 554on failure. 555.It Fn m_getclr how type 556Allocate an 557.Vt mbuf 558and zero out the data region. 559.It Fn m_free mbuf 560Frees 561.Vt mbuf . 562.El 563.Pp 564The functions below operate on 565.Vt mbuf chains . 566.Bl -ohang -offset indent 567.It Fn m_freem mbuf 568Free an entire 569.Vt mbuf chain , 570including any external storage. 571.\" 572.It Fn m_adj mbuf len 573Trim 574.Fa len 575bytes from the head of an 576.Vt mbuf chain 577if 578.Fa len 579is positive, from the tail otherwise. 580.\" 581.It Fn m_prepend mbuf len how 582Allocate a new 583.Vt mbuf 584and prepend it to the 585.Vt mbuf chain , 586handle 587.Dv M_PKTHDR 588properly. 589.Sy Note : 590It doesn't allocate any 591.Vt mbuf clusters , 592so 593.Fa len 594must be less than 595.Dv MLEN 596or 597.Dv MHLEN , 598depending on the 599.Dv M_PKTHDR 600flag setting. 601.\" 602.It Fn m_pullup mbuf len 603Arrange that the first 604.Fa len 605bytes of an 606.Vt mbuf chain 607are contiguous and lay in the data area of 608.Fa mbuf , 609so they are accessible with 610.Fn mtod mbuf type . 611Return the new 612.Vt mbuf chain 613on success, 614.Dv NULL 615on failure 616(the 617.Vt mbuf chain 618is freed in this case). 619.Sy Note : 620It doesn't allocate any 621.Vt mbuf clusters , 622so 623.Fa len 624must be less than 625.Dv MHLEN . 626.\" 627.It Fn m_copym mbuf offset len how 628Make a copy of an 629.Vt mbuf chain 630starting 631.Fa offset 632bytes from the beginning, continuing for 633.Fa len 634bytes. 635If 636.Fa len 637is 638.Dv M_COPYALL , 639copy to the end of the 640.Vt mbuf chain . 641.Sy Note : 642The copy is read-only, because the 643.Vt mbuf clusters 644are not copied, only their reference counts are incremented. 645.\" 646.It Fn m_copypacket mbuf how 647Copy an entire packet including header, which must be present. 648This is an optimized version of the common case 649.Fn m_copym mbuf 0 M_COPYALL how . 650.Sy Note : 651the copy is read-only, because the 652.Vt mbuf clusters 653are not copied, only their reference counts are incremented. 654.\" 655.It Fn m_dup mbuf how 656Copy a packet header 657.Vt mbuf chain 658into a completely new 659.Vt mbuf chain , 660including copying any 661.Vt mbuf clusters . 662Use this instead of 663.Fn m_copypacket 664when you need a writable copy of an 665.Vt mbuf chain . 666.\" 667.It Fn m_copydata mbuf offset len buf 668Copy data from an 669.Vt mbuf chain 670starting 671.Fa off 672bytes from the beginning, continuing for 673.Fa len 674bytes, into the indicated buffer 675.Fa buf . 676.\" 677.It Fn m_copyback mbuf offset len buf 678Copy 679.Fa len 680bytes from the buffer 681.Fa buf 682back into the indicated 683.Vt mbuf chain , 684starting at 685.Fa offset 686bytes from the beginning of the 687.Vt mbuf chain , 688extending the 689.Vt mbuf chain 690if necessary. 691.Sy Note : 692It doesn't allocate any 693.Vt mbuf clusters , 694just adds 695.Vt mbufs 696to the 697.Vt mbuf chain . 698It's safe to set 699.Fa offset 700beyond the current 701.Vt mbuf chain 702end: zeroed 703.Vt mbufs 704will be allocated to fill the space. 705.\" 706.It Fn m_length mbuf last 707Return the length of the 708.Vt mbuf chain , 709and optionally a pointer to the last 710.Vt mbuf . 711.\" 712.It Fn m_dup_pkthdr to from how 713Upon the function's completion, the 714.Vt mbuf 715.Fa to 716will contain an identical copy of 717.Fa from->m_pkthdr 718and the per-packet attributes found in the 719.Vt mbuf chain 720.Fa from . 721The 722.Vt mbuf 723.Fa from 724must have the flag 725.Dv M_PKTHDR 726initially set, and 727.Fa to 728must be empty on entry. 729.\" 730.It Fn m_move_pkthdr to from 731Move 732.Va m_pkthdr 733and the per-packet attributes from the 734.Vt mbuf chain 735.Fa from 736to the 737.Vt mbuf 738.Fa to . 739The 740.Vt mbuf 741.Fa from 742must have the flag 743.Dv M_PKTHDR 744initially set, and 745.Fa to 746must be empty on entry. 747Upon the function's completion, 748.Fa from 749will have the flag 750.Dv M_PKTHDR 751and the per-packet attributes cleared. 752.\" 753.It Fn m_fixhdr mbuf 754Set the packet-header length to the length of the 755.Vt mbuf chain . 756.\" 757.It Fn m_devget buf len offset ifp copy 758Copy data from a device local memory pointed to by 759.Fa buf 760to an 761.Vt mbuf chain . 762The copy is done using a specified copy routine 763.Fa copy , 764or 765.Fn bcopy 766if 767.Fa copy 768is 769.Dv NULL . 770.\" 771.It Fn m_cat m n 772Concatenate 773.Fa n 774to 775.Fa m . 776Both 777.Vt mbuf chains 778must be of the same type. 779.Fa N 780is still valid after the function returned. 781.Sy Note : 782It does not handle 783.Dv M_PKTHDR 784and friends. 785.\" 786.It Fn m_split mbuf len how 787Partition an 788.Vt mbuf chain 789in two pieces, returning the tail: 790all but the first 791.Fa len 792bytes. 793In case of failure, it returns 794.Dv NULL 795and attempts to restore the 796.Vt mbuf chain 797to its original state. 798.\" 799.It Fn m_apply mbuf off len f arg 800Apply a function to an 801.Vt mbuf chain , 802at offset 803.Fa off , 804for length 805.Fa len 806bytes. 807Typically used to avoid calls to 808.Fn m_pullup 809which would otherwise be unnecessary or undesirable. 810.Fa arg 811is a convenience argument which is passed to the callback function 812.Fa f . 813.Pp 814Each time 815.Fn f 816is called, it will be passed 817.Fa arg , 818a pointer to the 819.Fa data 820in the current mbuf, and the length 821.Fa len 822of the data in this mbuf to which the function should be applied. 823.Pp 824The function should return zero to indicate success; 825otherwise, if an error is indicated, then 826.Fn m_apply 827will return the error and stop iterating through the 828.Vt mbuf chain . 829.\" 830.It Fn m_getptr mbuf loc off 831Return a pointer to the mbuf containing the data located at 832.Fa loc 833bytes from the beginning of the 834.Vt mbuf chain . 835The corresponding offset into the mbuf will be stored in 836.Fa *off . 837.It Fn m_defrag m0 how 838Defragment an mbuf chain, returning the shortest possible 839chain of mbufs and clusters. 840If allocation fails and this can not be completed, 841.Dv NULL 842will be returned and the original chain will be unchanged. 843Upon success, the original chain will be freed and the new 844chain will be returned. 845.Fa how 846should be either 847.Dv M_TRYWAIT 848or 849.Dv M_DONTWAIT , 850depending on the caller's preference. 851.Pp 852This function is especially useful in network drivers, where 853certain long mbuf chains must be shortened before being added 854to TX descriptor lists. 855.El 856.Sh HARDWARE-ASSISTED CHECKSUM CALCULATION 857This section currently applies to TCP/IP only. 858In order to save the host CPU resources, computing checksums is 859offloaded to the network interface hardware if possible. 860The 861.Va m_pkthdr 862member of the leading 863.Vt mbuf 864of a packet contains two fields used for that purpose, 865.Vt int Va csum_flags 866and 867.Vt int Va csum_data . 868The meaning of those fields depends on the direction a packet flows in, 869and on whether the packet is fragmented. 870Henceforth, 871.Va csum_flags 872or 873.Va csum_data 874of a packet 875will denote the corresponding field of the 876.Va m_pkthdr 877member of the leading 878.Vt mbuf 879in the 880.Vt mbuf chain 881containing the packet. 882.Pp 883On output, checksum offloading is attempted after the outgoing 884interface has been determined for a packet. 885The interface-specific field 886.Va ifnet.if_data.ifi_hwassist 887(see 888.Xr ifnet 9 ) 889is consulted for the capabilities of the interface to assist in 890computing checksums. 891The 892.Va csum_flags 893field of the packet header is set to indicate which actions the interface 894is supposed to perform on it. 895The actions unsupported by the network interface are done in the 896software prior to passing the packet down to the interface driver; 897such actions will never be requested through 898.Va csum_flags . 899.Pp 900The flags demanding a particular action from an interface are as follows: 901.Bl -tag -width ".Dv CSUM_TCP" -offset indent 902.It Dv CSUM_IP 903The IP header checksum is to be computed and stored in the 904corresponding field of the packet. 905The hardware is expected to know the format of an IP header 906to determine the offset of the IP checksum field. 907.It Dv CSUM_TCP 908The TCP checksum is to be computed. 909(See below.) 910.It Dv CSUM_UDP 911The UDP checksum is to be computed. 912(See below.) 913.El 914.Pp 915Should a TCP or UDP checksum be offloaded to the hardware, 916the field 917.Va csum_data 918will contain the byte offset of the checksum field relative to the 919end of the IP header. 920In this case, the checksum field will be initially 921set by the TCP/IP module to the checksum of the pseudo header 922defined by the TCP and UDP specifications. 923.Pp 924For outbound packets which have been fragmented 925by the host CPU, the following will also be true, 926regardless of the checksum flag settings: 927.Bl -bullet -offset indent 928.It 929all fragments will have the flag 930.Dv M_FRAG 931set in their 932.Va m_flags 933field; 934.It 935the first and the last fragments in the chain will have 936.Dv M_FIRSTFRAG 937or 938.Dv M_LASTFRAG 939set in their 940.Va m_flags , 941correspondingly; 942.It 943the first fragment in the chain will have the total number 944of fragments contained in its 945.Va csum_data 946field. 947.El 948.Pp 949The last rule for fragmented packets takes precedence over the one 950for a TCP or UDP checksum. 951Nevertheless, offloading a TCP or UDP checksum is possible for a 952fragmented packet if the flag 953.Dv CSUM_IP_FRAGS 954is set in the field 955.Va ifnet.if_data.ifi_hwassist 956associated with the network interface. 957However, in this case the interface is expected to figure out 958the location of the checksum field within the sequence of fragments 959by itself because 960.Va csum_data 961contains a fragment count instead of a checksum offset value. 962.Pp 963On input, an interface indicates the actions it has performed 964on a packet by setting one or more of the following flags in 965.Va csum_flags 966associated with the packet: 967.Bl -tag -width ".Dv CSUM_IP_CHECKED" -offset indent 968.It Dv CSUM_IP_CHECKED 969The IP header checksum has been computed. 970.It Dv CSUM_IP_VALID 971The IP header has a valid checksum. 972This flag can appear only in combination with 973.Dv CSUM_IP_CHECKED . 974.It Dv CSUM_DATA_VALID 975The checksum of the data portion of the IP packet has been computed 976and stored in the field 977.Va csum_data 978in network byte order. 979.It Dv CSUM_PSEUDO_HDR 980Can be set only along with 981.Dv CSUM_DATA_VALID 982to indicate that the IP data checksum found in 983.Va csum_data 984allows for the pseudo header defined by the TCP and UDP specifications. 985Otherwise the checksum of the pseudo header must be calculated by 986the host CPU and added to 987.Va csum_data 988to obtain the final checksum to be used for TCP or UDP validation purposes. 989.El 990.Pp 991If a particular network interface just indicates success or 992failure of TCP or UDP checksum validation without returning 993the exact value of the checksum to the host CPU, its driver can mark 994.Dv CSUM_DATA_VALID 995and 996.Dv CSUM_PSEUDO_HDR 997in 998.Va csum_flags , 999and set 1000.Va csum_data 1001to 1002.Li 0xFFFF 1003hexadecimal to indicate a valid checksum. 1004It is a peculiarity of the algorithm used that the Internet checksum 1005calculated over any valid packet will be 1006.Li 0xFFFF 1007as long as the original checksum field is included. 1008.Pp 1009For inbound packets which are IP fragments, all 1010.Va csum_data 1011fields will be summed during reassembly to obtain the final checksum 1012value passed to an upper layer in the 1013.Va csum_data 1014field of the reassembled packet. 1015The 1016.Va csum_flags 1017fields of all fragments will be consolidated using logical AND 1018to obtain the final value for 1019.Va csum_flags . 1020Thus, in order to successfully 1021offload checksum computation for fragmented data, 1022all fragments should have the same value of 1023.Va csum_flags . 1024.Sh STRESS TESTING 1025When running a kernel compiled with the option 1026.Dv MBUF_STRESS_TEST , 1027the following 1028.Xr sysctl 8 Ns 1029-controlled options may be used to create 1030various failure/extreme cases for testing of network drivers 1031and other parts of the kernel that rely on 1032.Vt mbufs . 1033.Bl -tag -width ident 1034.It Va net.inet.ip.mbuf_frag_size 1035Causes 1036.Fn ip_output 1037to fragment outgoing 1038.Vt mbuf chains 1039into fragments of the specified size. 1040Setting this variable to 1 is an excellent way to 1041test the long 1042.Vt mbuf chain 1043handling ability of network drivers. 1044.It Va kern.ipc.m_defragrandomfailures 1045Causes the function 1046.Fn m_defrag 1047to randomly fail, returning 1048.Dv NULL . 1049Any piece of code which uses 1050.Fn m_defrag 1051should be tested with this feature. 1052.El 1053.Sh RETURN VALUES 1054See above. 1055.Sh SEE ALSO 1056.Xr ifnet 9 , 1057.Xr mbuf_tags 9 1058.Sh HISTORY 1059.\" Please correct me if I'm wrong 1060.Vt Mbufs 1061appeared in an early version of 1062.Bx . 1063Besides being used for network packets, they were used 1064to store various dynamic structures, such as routing table 1065entries, interface addresses, protocol control blocks, etc. 1066.Sh AUTHORS 1067The original 1068.Nm 1069man page was written by Yar Tikhiy. 1070