1.\" Copyright (c) 2000 FreeBSD Inc. 2.\" All rights reserved. 3.\" 4.\" Redistribution and use in source and binary forms, with or without 5.\" modification, are permitted provided that the following conditions 6.\" are met: 7.\" 1. Redistributions of source code must retain the above copyright 8.\" notice, this list of conditions and the following disclaimer. 9.\" 2. Redistributions in binary form must reproduce the above copyright 10.\" notice, this list of conditions and the following disclaimer in the 11.\" documentation and/or other materials provided with the distribution. 12.\" 13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16.\" ARE DISCLAIMED. IN NO EVENT SHALL [your name] OR CONTRIBUTORS BE LIABLE 17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23.\" SUCH DAMAGE. 24.\" 25.\" $FreeBSD$ 26.\" 27.Dd May 20, 2004 28.Dt MBUF 9 29.Os 30.\" 31.Sh NAME 32.Nm mbuf 33.Nd "memory management in the kernel IPC subsystem" 34.\" 35.Sh SYNOPSIS 36.In sys/param.h 37.In sys/systm.h 38.In sys/mbuf.h 39.\" 40.Ss Mbuf allocation macros 41.Fn MGET "struct mbuf *mbuf" "int how" "short type" 42.Fn MGETHDR "struct mbuf *mbuf" "int how" "short type" 43.Fn MCLGET "struct mbuf *mbuf" "int how" 44.Fo MEXTADD 45.Fa "struct mbuf *mbuf" 46.Fa "caddr_t buf" 47.Fa "u_int size" 48.Fa "void (*free)(void *opt_args)" 49.Fa "void *opt_args" 50.Fa "short flags" 51.Fa "int type" 52.Fc 53.Fn MEXTFREE "struct mbuf *mbuf" 54.Fn MEXT_ADD_REF "struct mbuf *mbuf" 55.Fn MEXT_REM_REF "struct mbuf *mbuf" 56.Fn MFREE "struct mbuf *mbuf" "struct mbuf *successor" 57.\" 58.Ss Mbuf utility macros 59.Fn mtod "struct mbuf *mbuf" "type" 60.Ft int 61.Fn MEXT_IS_REF "struct mbuf *mbuf" 62.Fn M_ALIGN "struct mbuf *mbuf" "u_int len" 63.Fn MH_ALIGN "struct mbuf *mbuf" "u_int len" 64.Ft int 65.Fn M_LEADINGSPACE "struct mbuf *mbuf" 66.Ft int 67.Fn M_TRAILINGSPACE "struct mbuf *mbuf" 68.Fn M_MOVE_PKTHDR "struct mbuf *to" "struct mbuf *from" 69.Fn M_PREPEND "struct mbuf *mbuf" "int len" "int how" 70.Fn MCHTYPE "struct mbuf *mbuf" "u_int type" 71.Ft int 72.Fn M_WRITABLE "struct mbuf *mbuf" 73.\" 74.Ss Mbuf allocation functions 75.Ft struct mbuf * 76.Fn m_get "int how" "int type" 77.Ft struct mbuf * 78.Fn m_getm "struct mbuf *orig" "int len" "int how" "int type" 79.Ft struct mbuf * 80.Fn m_getcl "int how" "short type" "int flags" 81.Ft struct mbuf * 82.Fn m_getclr "int how" "int type" 83.Ft struct mbuf * 84.Fn m_gethdr "int how" "int type" 85.Ft struct mbuf * 86.Fn m_free "struct mbuf *mbuf" 87.Ft void 88.Fn m_freem "struct mbuf *mbuf" 89.\" 90.Ss Mbuf utility functions 91.Ft void 92.Fn m_adj "struct mbuf *mbuf" "int len" 93.Ft struct mbuf * 94.Fn m_prepend "struct mbuf *mbuf" "int len" "int how" 95.Ft struct mbuf * 96.Fn m_pullup "struct mbuf *mbuf" "int len" 97.Ft struct mbuf * 98.Fn m_copym "struct mbuf *mbuf" "int offset" "int len" "int how" 99.Ft struct mbuf * 100.Fn m_copypacket "struct mbuf *mbuf" "int how" 101.Ft struct mbuf * 102.Fn m_dup "struct mbuf *mbuf" "int how" 103.Ft void 104.Fn m_copydata "const struct mbuf *mbuf" "int offset" "int len" "caddr_t buf" 105.Ft void 106.Fn m_copyback "struct mbuf *mbuf" "int offset" "int len" "caddr_t buf" 107.Ft struct mbuf * 108.Fo m_devget 109.Fa "char *buf" 110.Fa "int len" 111.Fa "int offset" 112.Fa "struct ifnet *ifp" 113.Fa "void (*copy)(char *from, caddr_t to, u_int len)" 114.Fc 115.Ft void 116.Fn m_cat "struct mbuf *m" "struct mbuf *n" 117.Ft u_int 118.Fn m_fixhdr "struct mbuf *mbuf" 119.Ft void 120.Fn m_dup_pkthdr "struct mbuf *to" "struct mbuf *from" 121.Ft void 122.Fn m_move_pkthdr "struct mbuf *to" "struct mbuf *from" 123.Ft u_int 124.Fn m_length "struct mbuf *mbuf" "struct mbuf **last" 125.Ft struct mbuf * 126.Fn m_split "struct mbuf *mbuf" "int len" "int how" 127.Ft int 128.Fn m_apply "struct mbuf *mbuf" "int off" "int len" "int (*f)(void *arg, void *data, u_int len)" "void *arg" 129.Ft struct mbuf * 130.Fn m_getptr "struct mbuf *mbuf" "int loc" "int *off" 131.Ft struct mbuf * 132.Fn m_defrag "struct mbuf *m0" "int how" 133.\" 134.Sh DESCRIPTION 135An 136.Vt mbuf 137is a basic unit of memory management in the kernel IPC subsystem. 138Network packets and socket buffers are stored in 139.Vt mbufs . 140A network packet may span multiple 141.Vt mbufs 142arranged into a 143.Vt mbuf chain 144(linked list), 145which allows adding or trimming 146network headers with little overhead. 147.Pp 148While a developer should not bother with 149.Vt mbuf 150internals without serious 151reason in order to avoid incompatibilities with future changes, it 152is useful to understand the general structure of an 153.Vt mbuf . 154.Pp 155An 156.Vt mbuf 157consists of a variable-sized header and a small internal 158buffer for data. 159The total size of an 160.Vt mbuf , 161.Dv MSIZE , 162is a constant defined in 163.In sys/param.h . 164The 165.Vt mbuf 166header includes: 167.Pp 168.Bl -tag -width "m_nextpkt" -offset indent 169.It Va m_next 170.Pq Vt struct mbuf * 171A pointer to the next 172.Vt mbuf 173in the 174.Vt mbuf chain . 175.It Va m_nextpkt 176.Pq Vt struct mbuf * 177A pointer to the next 178.Vt mbuf chain 179in the queue. 180.It Va m_data 181.Pq Vt caddr_t 182A pointer to data attached to this 183.Vt mbuf . 184.It Va m_len 185.Pq Vt int 186The length of the data. 187.It Va m_type 188.Pq Vt short 189The type of the data. 190.It Va m_flags 191.Pq Vt int 192The 193.Vt mbuf 194flags. 195.El 196.Pp 197The 198.Vt mbuf 199flag bits are defined as follows: 200.Bd -literal 201/* mbuf flags */ 202#define M_EXT 0x0001 /* has associated external storage */ 203#define M_PKTHDR 0x0002 /* start of record */ 204#define M_EOR 0x0004 /* end of record */ 205#define M_RDONLY 0x0008 /* associated data marked read-only */ 206#define M_PROTO1 0x0010 /* protocol-specific */ 207#define M_PROTO2 0x0020 /* protocol-specific */ 208#define M_PROTO3 0x0040 /* protocol-specific */ 209#define M_PROTO4 0x0080 /* protocol-specific */ 210#define M_PROTO5 0x0100 /* protocol-specific */ 211#define M_PROTO6 0x4000 /* protocol-specific (avoid M_BCAST conflict) */ 212#define M_FREELIST 0x8000 /* mbuf is on the free list */ 213 214/* mbuf pkthdr flags (also stored in m_flags) */ 215#define M_BCAST 0x0200 /* send/received as link-level broadcast */ 216#define M_MCAST 0x0400 /* send/received as link-level multicast */ 217#define M_FRAG 0x0800 /* packet is fragment of larger packet */ 218#define M_FIRSTFRAG 0x1000 /* packet is first fragment */ 219#define M_LASTFRAG 0x2000 /* packet is last fragment */ 220.Ed 221.Pp 222The available 223.Vt mbuf 224types are defined as follows: 225.Bd -literal 226/* mbuf types */ 227#define MT_DATA 1 /* dynamic (data) allocation */ 228#define MT_HEADER 2 /* packet header */ 229#define MT_SONAME 8 /* socket name */ 230#define MT_FTABLE 11 /* fragment reassembly header */ 231#define MT_CONTROL 14 /* extra-data protocol message */ 232#define MT_OOBDATA 15 /* expedited data */ 233.Ed 234.Pp 235If the 236.Dv M_PKTHDR 237flag is set, a 238.Vt struct pkthdr Va m_pkthdr 239is added to the 240.Vt mbuf 241header. 242It contains a pointer to the interface 243the packet has been received from 244.Pq Vt struct ifnet Va *rcvif , 245and the total packet length 246.Pq Vt int Va len . 247Optionally, it may also contain an attached list of packet tags 248.Pq Vt "struct m_tag" . 249See 250.Xr mbuf_tags 9 251for details. 252Fields used in offloading checksum calculation to the hardware are kept in 253.Va m_pkthdr 254as well. 255See 256.Sx HARDWARE-ASSISTED CHECKSUM CALCULATION 257for details. 258.Pp 259If small enough, data is stored in the internal data buffer of an 260.Vt mbuf . 261If the data is sufficiently large, another 262.Vt mbuf 263may be added to the 264.Vt mbuf chain , 265or external storage may be associated with the 266.Vt mbuf . 267.Dv MHLEN 268bytes of data can fit into an 269.Vt mbuf 270with the 271.Dv M_PKTHDR 272flag set, 273.Dv MLEN 274bytes can otherwise. 275.Pp 276If external storage is being associated with an 277.Vt mbuf , 278the 279.Va m_ext 280header is added at the cost of losing the internal data buffer. 281It includes a pointer to external storage, the size of the storage, 282a pointer to a function used for freeing the storage, 283a pointer to an optional argument that can be passed to the function, 284and a pointer to a reference counter. 285An 286.Vt mbuf 287using external storage has the 288.Dv M_EXT 289flag set. 290.Pp 291The system supplies a macro for allocating the desired external storage 292buffer, 293.Dv MEXTADD . 294.Pp 295The allocation and management of the reference counter is handled by the 296subsystem. 297The developer can check whether the reference count for the 298external storage of a given 299.Vt mbuf 300is greater than 1 with the 301.Dv MEXT_IS_REF 302macro. 303Similarly, the developer can directly add and remove references, 304if absolutely necessary, with the use of the 305.Dv MEXT_ADD_REF 306and 307.Dv MEXT_REM_REF 308macros. 309.Pp 310The system also supplies a default type of external storage buffer called an 311.Vt mbuf cluster . 312.Vt Mbuf clusters 313can be allocated and configured with the use of the 314.Dv MCLGET 315macro. 316Each 317.Vt mbuf cluster 318is 319.Dv MCLBYTES 320in size, where MCLBYTES is a machine-dependent constant. 321The system defines an advisory macro 322.Dv MINCLSIZE , 323which is the smallest amount of data to put into an 324.Vt mbuf cluster . 325It's equal to the sum of 326.Dv MLEN 327and 328.Dv MHLEN . 329It is typically preferable to store data into the data region of an 330.Vt mbuf , 331if size permits, as opposed to allocating a separate 332.Vt mbuf cluster 333to hold the same data. 334.\" 335.Ss Macros and Functions 336There are numerous predefined macros and functions that provide the 337developer with common utilities. 338.\" 339.Bl -ohang -offset indent 340.It Fn mtod mbuf type 341Convert an 342.Fa mbuf 343pointer to a data pointer. 344The macro expands to the data pointer cast to the pointer of the specified 345.Fa type . 346.Sy Note : 347It is advisable to ensure that there is enough contiguous data in 348.Fa mbuf . 349See 350.Fn m_pullup 351for details. 352.It Fn MGET mbuf how type 353Allocate an 354.Vt mbuf 355and initialize it to contain internal data. 356.Fa mbuf 357will point to the allocated 358.Vt mbuf 359on success, or be set to 360.Dv NULL 361on failure. 362The 363.Fa how 364argument is to be set to 365.Dv M_TRYWAIT 366or 367.Dv M_DONTWAIT . 368It specifies whether the caller is willing to block if necessary. 369If 370.Fa how 371is set to 372.Dv M_TRYWAIT , 373a failed allocation will result in the caller being put 374to sleep for a designated 375kern.ipc.mbuf_wait 376.Xr ( sysctl 8 377tunable) 378number of ticks. 379A number of other functions and macros related to 380.Vt mbufs 381have the same argument because they may 382at some point need to allocate new 383.Vt mbufs . 384.Pp 385Programmers should be careful not to confuse the 386.Vt mbuf 387allocation flag 388.Dv M_DONTWAIT 389with the 390.Xr malloc 9 391allocation flag, 392.Dv M_NOWAIT . 393They are not the same. 394.It Fn MGETHDR mbuf how type 395Allocate an 396.Vt mbuf 397and initialize it to contain a packet header 398and internal data. 399See 400.Fn MGET 401for details. 402.It Fn MCLGET mbuf how 403Allocate and attach an 404.Vt mbuf cluster 405to 406.Fa mbuf . 407If the macro fails, the 408.Dv M_EXT 409flag won't be set in 410.Fa mbuf . 411.It Fn M_ALIGN mbuf len 412Set the pointer 413.Fa mbuf->m_data 414to place an object of the size 415.Fa len 416at the end of the internal data area of 417.Fa mbuf , 418long word aligned. 419Applicable only if 420.Fa mbuf 421is newly allocated with 422.Fn MGET 423or 424.Fn m_get . 425.It Fn MH_ALIGN mbuf len 426Serves the same purpose as 427.Fn M_ALIGN 428does, but only for 429.Fa mbuf 430newly allocated with 431.Fn MGETHDR 432or 433.Fn m_gethdr , 434or initialized by 435.Fn m_dup_pkthdr 436or 437.Fn m_move_pkthdr . 438.It Fn M_LEADINGSPACE mbuf 439Returns the number of bytes available before the beginning 440of data in 441.Fa mbuf . 442.It Fn M_TRAILINGSPACE mbuf 443Returns the number of bytes available after the end of data in 444.Fa mbuf . 445.It Fn M_PREPEND mbuf len how 446This macro operates on an 447.Vt mbuf chain . 448It is an optimized wrapper for 449.Fn m_prepend 450that can make use of possible empty space before data 451(e.g.\& left after trimming of a link-layer header). 452The new 453.Vt mbuf chain 454pointer or 455.Dv NULL 456is in 457.Fa mbuf 458after the call. 459.It Fn M_MOVE_PKTHDR to from 460Using this macro is equivalent to calling 461.Fn m_move_pkthdr to from . 462.It Fn M_WRITABLE mbuf 463This macro will evaluate true if 464.Fa mbuf 465is not marked 466.Dv M_RDONLY 467and if either 468.Fa mbuf 469does not contain external storage or, 470if it does, 471then if the reference count of the storage is not greater than 1. 472The 473.Dv M_RDONLY 474flag can be set in 475.Fa mbuf->m_flags . 476This can be achieved during setup of the external storage, 477by passing the 478.Dv M_RDONLY 479bit as a 480.Fa flags 481argument to the 482.Fn MEXTADD 483macro, or can be directly set in individual 484.Vt mbufs . 485.It Fn MCHTYPE mbuf type 486Change the type of 487.Fa mbuf 488to 489.Fa type . 490This is a relatively expensive operation and should be avoided. 491.El 492.Pp 493The functions are: 494.Bl -ohang -offset indent 495.It Fn m_get how type 496A function version of 497.Fn MGET 498for non-critical paths. 499.It Fn m_getm orig len how type 500Allocate 501.Fa len 502bytes worth of 503.Vt mbufs 504and 505.Vt mbuf clusters 506if necessary and append the resulting allocated 507.Vt mbuf chain 508to the 509.Vt mbuf chain 510.Fa orig , 511if it is 512.No non- Ns Dv NULL . 513If the allocation fails at any point, 514free whatever was allocated and return 515.Dv NULL . 516If 517.Fa orig 518is 519.No non- Ns Dv NULL , 520it will not be freed. 521It is possible to use 522.Fn m_getm 523to either append 524.Fa len 525bytes to an existing 526.Vt mbuf 527or 528.Vt mbuf chain 529(for example, one which may be sitting in a pre-allocated ring) 530or to simply perform an all-or-nothing 531.Vt mbuf 532and 533.Vt mbuf cluster 534allocation. 535.It Fn m_gethdr how type 536A function version of 537.Fn MGETHDR 538for non-critical paths. 539.It Fn m_getcl how type flags 540Fetch an 541.Vt mbuf 542with a 543.Vt mbuf cluster 544attached to it. 545If one of the allocations fails, the entire allocation fails. 546This routine is the preferred way of fetching both the 547.Vt mbuf 548and 549.Vt mbuf cluster 550together, as it avoids having to unlock/relock between allocations. 551Returns 552.Dv NULL 553on failure. 554.It Fn m_getclr how type 555Allocate an 556.Vt mbuf 557and zero out the data region. 558.It Fn m_free mbuf 559Frees 560.Vt mbuf . 561.El 562.Pp 563The functions below operate on 564.Vt mbuf chains . 565.Bl -ohang -offset indent 566.It Fn m_freem mbuf 567Free an entire 568.Vt mbuf chain , 569including any external storage. 570.\" 571.It Fn m_adj mbuf len 572Trim 573.Fa len 574bytes from the head of an 575.Vt mbuf chain 576if 577.Fa len 578is positive, from the tail otherwise. 579.\" 580.It Fn m_prepend mbuf len how 581Allocate a new 582.Vt mbuf 583and prepend it to the 584.Vt mbuf chain , 585handle 586.Dv M_PKTHDR 587properly. 588.Sy Note : 589It doesn't allocate any 590.Vt mbuf clusters , 591so 592.Fa len 593must be less than 594.Dv MLEN 595or 596.Dv MHLEN , 597depending on the 598.Dv M_PKTHDR 599flag setting. 600.\" 601.It Fn m_pullup mbuf len 602Arrange that the first 603.Fa len 604bytes of an 605.Vt mbuf chain 606are contiguous and lay in the data area of 607.Fa mbuf , 608so they are accessible with 609.Fn mtod mbuf type . 610Return the new 611.Vt mbuf chain 612on success, 613.Dv NULL 614on failure 615(the 616.Vt mbuf chain 617is freed in this case). 618.Sy Note : 619It doesn't allocate any 620.Vt mbuf clusters , 621so 622.Fa len 623must be less than 624.Dv MHLEN . 625.\" 626.It Fn m_copym mbuf offset len how 627Make a copy of an 628.Vt mbuf chain 629starting 630.Fa offset 631bytes from the beginning, continuing for 632.Fa len 633bytes. 634If 635.Fa len 636is 637.Dv M_COPYALL , 638copy to the end of the 639.Vt mbuf chain . 640.Sy Note : 641The copy is read-only, because the 642.Vt mbuf clusters 643are not copied, only their reference counts are incremented. 644.\" 645.It Fn m_copypacket mbuf how 646Copy an entire packet including header, which must be present. 647This is an optimized version of the common case 648.Fn m_copym mbuf 0 M_COPYALL how . 649.Sy Note : 650the copy is read-only, because the 651.Vt mbuf clusters 652are not copied, only their reference counts are incremented. 653.\" 654.It Fn m_dup mbuf how 655Copy a packet header 656.Vt mbuf chain 657into a completely new 658.Vt mbuf chain , 659including copying any 660.Vt mbuf clusters . 661Use this instead of 662.Fn m_copypacket 663when you need a writable copy of an 664.Vt mbuf chain . 665.\" 666.It Fn m_copydata mbuf offset len buf 667Copy data from an 668.Vt mbuf chain 669starting 670.Fa off 671bytes from the beginning, continuing for 672.Fa len 673bytes, into the indicated buffer 674.Fa buf . 675.\" 676.It Fn m_copyback mbuf offset len buf 677Copy 678.Fa len 679bytes from the buffer 680.Fa buf 681back into the indicated 682.Vt mbuf chain , 683starting at 684.Fa offset 685bytes from the beginning of the 686.Vt mbuf chain , 687extending the 688.Vt mbuf chain 689if necessary. 690.Sy Note : 691It doesn't allocate any 692.Vt mbuf clusters , 693just adds 694.Vt mbufs 695to the 696.Vt mbuf chain . 697It's safe to set 698.Fa offset 699beyond the current 700.Vt mbuf chain 701end: zeroed 702.Vt mbufs 703will be allocated to fill the space. 704.\" 705.It Fn m_length mbuf last 706Return the length of the 707.Vt mbuf chain , 708and optionally a pointer to the last 709.Vt mbuf . 710.\" 711.It Fn m_dup_pkthdr to from how 712Upon the function's completion, the 713.Vt mbuf 714.Fa to 715will contain an identical copy of 716.Fa from->m_pkthdr 717and the per-packet attributes found in the 718.Vt mbuf chain 719.Fa from . 720The 721.Vt mbuf 722.Fa from 723must have the flag 724.Dv M_PKTHDR 725initially set, and 726.Fa to 727must be empty on entry. 728.\" 729.It Fn m_move_pkthdr to from 730Move 731.Va m_pkthdr 732and the per-packet attributes from the 733.Vt mbuf chain 734.Fa from 735to the 736.Vt mbuf 737.Fa to . 738The 739.Vt mbuf 740.Fa from 741must have the flag 742.Dv M_PKTHDR 743initially set, and 744.Fa to 745must be empty on entry. 746Upon the function's completion, 747.Fa from 748will have the flag 749.Dv M_PKTHDR 750and the per-packet attributes cleared. 751.\" 752.It Fn m_fixhdr mbuf 753Set the packet-header length to the length of the 754.Vt mbuf chain . 755.\" 756.It Fn m_devget buf len offset ifp copy 757Copy data from a device local memory pointed to by 758.Fa buf 759to an 760.Vt mbuf chain . 761The copy is done using a specified copy routine 762.Fa copy , 763or 764.Fn bcopy 765if 766.Fa copy 767is 768.Dv NULL . 769.\" 770.It Fn m_cat m n 771Concatenate 772.Fa n 773to 774.Fa m . 775Both 776.Vt mbuf chains 777must be of the same type. 778.Fa N 779is still valid after the function returned. 780.Sy Note : 781It does not handle 782.Dv M_PKTHDR 783and friends. 784.\" 785.It Fn m_split mbuf len how 786Partition an 787.Vt mbuf chain 788in two pieces, returning the tail: 789all but the first 790.Fa len 791bytes. 792In case of failure, it returns 793.Dv NULL 794and attempts to restore the 795.Vt mbuf chain 796to its original state. 797.\" 798.It Fn m_apply mbuf off len f arg 799Apply a function to an 800.Vt mbuf chain , 801at offset 802.Fa off , 803for length 804.Fa len 805bytes. 806Typically used to avoid calls to 807.Fn m_pullup 808which would otherwise be unnecessary or undesirable. 809.Fa arg 810is a convenience argument which is passed to the callback function 811.Fa f . 812.Pp 813Each time 814.Fn f 815is called, it will be passed 816.Fa arg , 817a pointer to the 818.Fa data 819in the current mbuf, and the length 820.Fa len 821of the data in this mbuf to which the function should be applied. 822.Pp 823The function should return zero to indicate success; 824otherwise, if an error is indicated, then 825.Fn m_apply 826will return the error and stop iterating through the 827.Vt mbuf chain . 828.\" 829.It Fn m_getptr mbuf loc off 830Return a pointer to the mbuf containing the data located at 831.Fa loc 832bytes from the beginning of the 833.Vt mbuf chain . 834The corresponding offset into the mbuf will be stored in 835.Fa *off . 836.It Fn m_defrag m0 how 837Defragment an mbuf chain, returning the shortest possible 838chain of mbufs and clusters. 839If allocation fails and this can not be completed, 840.Dv NULL 841will be returned and the original chain will be unchanged. 842Upon success, the original chain will be freed and the new 843chain will be returned. 844.Fa how 845should be either 846.Dv M_TRYWAIT 847or 848.Dv M_DONTWAIT , 849depending on the caller's preference. 850.Pp 851This function is especially useful in network drivers, where 852certain long mbuf chains must be shortened before being added 853to TX descriptor lists. 854.El 855.Sh HARDWARE-ASSISTED CHECKSUM CALCULATION 856This section currently applies to TCP/IP only. 857In order to save the host CPU resources, computing checksums is 858offloaded to the network interface hardware if possible. 859The 860.Va m_pkthdr 861member of the leading 862.Vt mbuf 863of a packet contains two fields used for that purpose, 864.Vt int Va csum_flags 865and 866.Vt int Va csum_data . 867The meaning of those fields depends on the direction a packet flows in, 868and on whether the packet is fragmented. 869Henceforth, 870.Va csum_flags 871or 872.Va csum_data 873of a packet 874will denote the corresponding field of the 875.Va m_pkthdr 876member of the leading 877.Vt mbuf 878in the 879.Vt mbuf chain 880containing the packet. 881.Pp 882On output, checksum offloading is attempted after the outgoing 883interface has been determined for a packet. 884The interface-specific field 885.Va ifnet.if_data.ifi_hwassist 886(see 887.Xr ifnet 9 ) 888is consulted for the capabilities of the interface to assist in 889computing checksums. 890The 891.Va csum_flags 892field of the packet header is set to indicate which actions the interface 893is supposed to perform on it. 894The actions unsupported by the network interface are done in the 895software prior to passing the packet down to the interface driver; 896such actions will never be requested through 897.Va csum_flags . 898.Pp 899The flags demanding a particular action from an interface are as follows: 900.Bl -tag -width ".Dv CSUM_TCP" -offset indent 901.It Dv CSUM_IP 902The IP header checksum is to be computed and stored in the 903corresponding field of the packet. 904The hardware is expected to know the format of an IP header 905to determine the offset of the IP checksum field. 906.It Dv CSUM_TCP 907The TCP checksum is to be computed. 908(See below.) 909.It Dv CSUM_UDP 910The UDP checksum is to be computed. 911(See below.) 912.El 913.Pp 914Should a TCP or UDP checksum be offloaded to the hardware, 915the field 916.Va csum_data 917will contain the byte offset of the checksum field relative to the 918end of the IP header. 919In this case, the checksum field will be initially 920set by the TCP/IP module to the checksum of the pseudo header 921defined by the TCP and UDP specifications. 922.Pp 923For outbound packets which have been fragmented 924by the host CPU, the following will also be true, 925regardless of the checksum flag settings: 926.Bl -bullet -offset indent 927.It 928all fragments will have the flag 929.Dv M_FRAG 930set in their 931.Va m_flags 932field; 933.It 934the first and the last fragments in the chain will have 935.Dv M_FIRSTFRAG 936or 937.Dv M_LASTFRAG 938set in their 939.Va m_flags , 940correspondingly; 941.It 942the first fragment in the chain will have the total number 943of fragments contained in its 944.Va csum_data 945field. 946.El 947.Pp 948The last rule for fragmented packets takes precedence over the one 949for a TCP or UDP checksum. 950Nevertheless, offloading a TCP or UDP checksum is possible for a 951fragmented packet if the flag 952.Dv CSUM_IP_FRAGS 953is set in the field 954.Va ifnet.if_data.ifi_hwassist 955associated with the network interface. 956However, in this case the interface is expected to figure out 957the location of the checksum field within the sequence of fragments 958by itself because 959.Va csum_data 960contains a fragment count instead of a checksum offset value. 961.Pp 962On input, an interface indicates the actions it has performed 963on a packet by setting one or more of the following flags in 964.Va csum_flags 965associated with the packet: 966.Bl -tag -width ".Dv CSUM_IP_CHECKED" -offset indent 967.It Dv CSUM_IP_CHECKED 968The IP header checksum has been computed. 969.It Dv CSUM_IP_VALID 970The IP header has a valid checksum. 971This flag can appear only in combination with 972.Dv CSUM_IP_CHECKED . 973.It Dv CSUM_DATA_VALID 974The checksum of the data portion of the IP packet has been computed 975and stored in the field 976.Va csum_data 977in network byte order. 978.It Dv CSUM_PSEUDO_HDR 979Can be set only along with 980.Dv CSUM_DATA_VALID 981to indicate that the IP data checksum found in 982.Va csum_data 983allows for the pseudo header defined by the TCP and UDP specifications. 984Otherwise the checksum of the pseudo header must be calculated by 985the host CPU and added to 986.Va csum_data 987to obtain the final checksum to be used for TCP or UDP validation purposes. 988.El 989.Pp 990If a particular network interface just indicates success or 991failure of TCP or UDP checksum validation without returning 992the exact value of the checksum to the host CPU, its driver can mark 993.Dv CSUM_DATA_VALID 994and 995.Dv CSUM_PSEUDO_HDR 996in 997.Va csum_flags , 998and set 999.Va csum_data 1000to 1001.Li 0xFFFF 1002hexadecimal to indicate a valid checksum. 1003It is a peculiarity of the algorithm used that the Internet checksum 1004calculated over any valid packet will be 1005.Li 0xFFFF 1006as long as the original checksum field is included. 1007.Pp 1008For inbound packets which are IP fragments, all 1009.Va csum_data 1010fields will be summed during reassembly to obtain the final checksum 1011value passed to an upper layer in the 1012.Va csum_data 1013field of the reassembled packet. 1014The 1015.Va csum_flags 1016fields of all fragments will be consolidated using logical AND 1017to obtain the final value for 1018.Va csum_flags . 1019Thus, in order to successfully 1020offload checksum computation for fragmented data, 1021all fragments should have the same value of 1022.Va csum_flags . 1023.Sh STRESS TESTING 1024When running a kernel compiled with the option 1025.Dv MBUF_STRESS_TEST , 1026the following 1027.Xr sysctl 8 Ns 1028-controlled options may be used to create 1029various failure/extreme cases for testing of network drivers 1030and other parts of the kernel that rely on 1031.Vt mbufs . 1032.Bl -tag -width ident 1033.It Va net.inet.ip.mbuf_frag_size 1034Causes 1035.Fn ip_output 1036to fragment outgoing 1037.Vt mbuf chains 1038into fragments of the specified size. 1039Setting this variable to 1 is an excellent way to 1040test the long 1041.Vt mbuf chain 1042handling ability of network drivers. 1043.It Va kern.ipc.m_defragrandomfailures 1044Causes the function 1045.Fn m_defrag 1046to randomly fail, returning 1047.Dv NULL . 1048Any piece of code which uses 1049.Fn m_defrag 1050should be tested with this feature. 1051.El 1052.Sh RETURN VALUES 1053See above. 1054.Sh SEE ALSO 1055.Xr ifnet 9 , 1056.Xr mbuf_tags 9 1057.Sh HISTORY 1058.\" Please correct me if I'm wrong 1059.Vt Mbufs 1060appeared in an early version of 1061.Bx . 1062Besides being used for network packets, they were used 1063to store various dynamic structures, such as routing table 1064entries, interface addresses, protocol control blocks, etc. 1065.Sh AUTHORS 1066The original 1067.Nm 1068man page was written by Yar Tikhiy. 1069