1.\" Copyright (c) 2000 FreeBSD Inc. 2.\" All rights reserved. 3.\" 4.\" Redistribution and use in source and binary forms, with or without 5.\" modification, are permitted provided that the following conditions 6.\" are met: 7.\" 1. Redistributions of source code must retain the above copyright 8.\" notice, this list of conditions and the following disclaimer. 9.\" 2. Redistributions in binary form must reproduce the above copyright 10.\" notice, this list of conditions and the following disclaimer in the 11.\" documentation and/or other materials provided with the distribution. 12.\" 13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16.\" ARE DISCLAIMED. IN NO EVENT SHALL [your name] OR CONTRIBUTORS BE LIABLE 17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23.\" SUCH DAMAGE. 24.\" 25.\" $FreeBSD$ 26.\" 27.Dd October 17, 2000 28.Dt MBUF 9 29.Os 30.\" 31.Sh NAME 32.Nm mbuf 33.Nd "memory management in the kernel IPC subsystem" 34.\" 35.Sh SYNOPSIS 36.In sys/param.h 37.In sys/systm.h 38.In sys/mbuf.h 39.\" 40.Ss Mbuf allocation macros 41.Fn MGET "struct mbuf *mbuf" "int how" "short type" 42.Fn MGETHDR "struct mbuf *mbuf" "int how" "short type" 43.Fn MCLGET "struct mbuf *mbuf" "int how" 44.Fo MEXTADD 45.Fa "struct mbuf *mbuf" 46.Fa "caddr_t buf" 47.Fa "u_int size" 48.Fa "void (*free)(void *opt_args)" 49.Fa "void *opt_args" 50.Fa "short flags" 51.Fa "int type" 52.Fc 53.Fn MEXTFREE "struct mbuf *mbuf" 54.Fn MEXT_ADD_REF "struct mbuf *mbuf" 55.Fn MEXT_REM_REF "struct mbuf *mbuf" 56.Fn MFREE "struct mbuf *mbuf" "struct mbuf *successor" 57.\" 58.Ss Mbuf utility macros 59.Fn mtod "struct mbuf *mbuf" "type" 60.Ft int 61.Fn MEXT_IS_REF "struct mbuf *mbuf" 62.Fn M_ALIGN "struct mbuf *mbuf" "u_int len" 63.Fn MH_ALIGN "struct mbuf *mbuf" "u_int len" 64.Ft int 65.Fn M_LEADINGSPACE "struct mbuf *mbuf" 66.Ft int 67.Fn M_TRAILINGSPACE "struct mbuf *mbuf" 68.Fn M_MOVE_PKTHDR "struct mbuf *to" "struct mbuf *from" 69.Fn M_PREPEND "struct mbuf *mbuf" "int len" "int how" 70.Fn MCHTYPE "struct mbuf *mbuf" "u_int type" 71.Ft int 72.Fn M_WRITABLE "struct mbuf *mbuf" 73.\" 74.Ss Mbuf allocation functions 75.Ft struct mbuf * 76.Fn m_get "int how" "int type" 77.Ft struct mbuf * 78.Fn m_getm "struct mbuf *orig" "int len" "int how" "int type" 79.Ft struct mbuf * 80.Fn m_getcl "int how" "short type" "int flags" 81.Ft struct mbuf * 82.Fn m_getclr "int how" "int type" 83.Ft struct mbuf * 84.Fn m_gethdr "int how" "int type" 85.Ft struct mbuf * 86.Fn m_free "struct mbuf *mbuf" 87.Ft void 88.Fn m_freem "struct mbuf *mbuf" 89.\" 90.Ss Mbuf utility functions 91.Ft void 92.Fn m_adj "struct mbuf *mbuf" "int len" 93.Ft struct mbuf * 94.Fn m_prepend "struct mbuf *mbuf" "int len" "int how" 95.Ft struct mbuf * 96.Fn m_pullup "struct mbuf *mbuf" "int len" 97.Ft struct mbuf * 98.Fn m_copym "struct mbuf *mbuf" "int offset" "int len" "int how" 99.Ft struct mbuf * 100.Fn m_copypacket "struct mbuf *mbuf" "int how" 101.Ft struct mbuf * 102.Fn m_dup "struct mbuf *mbuf" "int how" 103.Ft void 104.Fn m_copydata "const struct mbuf *mbuf" "int offset" "int len" "caddr_t buf" 105.Ft void 106.Fn m_copyback "struct mbuf *mbuf" "int offset" "int len" "caddr_t buf" 107.Ft struct mbuf * 108.Fo m_devget 109.Fa "char *buf" 110.Fa "int len" 111.Fa "int offset" 112.Fa "struct ifnet *ifp" 113.Fa "void (*copy)(char *from, caddr_t to, u_int len)" 114.Fc 115.Ft void 116.Fn m_cat "struct mbuf *m" "struct mbuf *n" 117.Ft u_int 118.Fn m_fixhdr "struct mbuf *mbuf" 119.Ft void 120.Fn m_dup_pkthdr "struct mbuf *to" "struct mbuf *from" 121.Ft void 122.Fn m_move_pkthdr "struct mbuf *to" "struct mbuf *from" 123.Ft u_int 124.Fn m_length "struct mbuf *mbuf" "struct mbuf **last" 125.Ft struct mbuf * 126.Fn m_split "struct mbuf *mbuf" "int len" "int how" 127.Ft int 128.Fn m_apply "struct mbuf *mbuf" "int off" "int len" "int (*f)(void *arg, void *data, u_int len)" "void *arg" 129.Ft struct mbuf * 130.Fn m_getptr "struct mbuf *mbuf" "int loc" "int *off" 131.Ft struct mbuf * 132.Fn m_defrag "struct mbuf *m0" "int how" 133.\" 134.Sh DESCRIPTION 135An 136.Vt mbuf 137is a basic unit of memory management in the kernel IPC subsystem. 138Network packets and socket buffers are stored in 139.Vt mbufs . 140A network packet may span multiple 141.Vt mbufs 142arranged into a 143.Vt mbuf chain 144(linked list), 145which allows adding or trimming 146network headers with little overhead. 147.Pp 148While a developer should not bother with 149.Vt mbuf 150internals without serious 151reason in order to avoid incompatibilities with future changes, it 152is useful to understand the general structure of an 153.Vt mbuf . 154.Pp 155An 156.Vt mbuf 157consists of a variable-sized header and a small internal 158buffer for data. 159The total size of an 160.Vt mbuf , 161.Dv MSIZE , 162is a constant defined in 163.In sys/param.h . 164The 165.Vt mbuf 166header includes: 167.Pp 168.Bl -tag -width "m_nextpkt" -offset indent 169.It Va m_next 170.Pq Vt struct mbuf * 171A pointer to the next 172.Vt mbuf 173in the 174.Vt mbuf chain . 175.It Va m_nextpkt 176.Pq Vt struct mbuf * 177A pointer to the next 178.Vt mbuf chain 179in the queue. 180.It Va m_data 181.Pq Vt caddr_t 182A pointer to data attached to this 183.Vt mbuf . 184.It Va m_len 185.Pq Vt int 186The length of the data. 187.It Va m_type 188.Pq Vt short 189The type of the data. 190.It Va m_flags 191.Pq Vt int 192The 193.Vt mbuf 194flags. 195.El 196.Pp 197The 198.Vt mbuf 199flag bits are defined as follows: 200.Bd -literal 201/* mbuf flags */ 202#define M_EXT 0x0001 /* has associated external storage */ 203#define M_PKTHDR 0x0002 /* start of record */ 204#define M_EOR 0x0004 /* end of record */ 205#define M_RDONLY 0x0008 /* associated data marked read-only */ 206#define M_PROTO1 0x0010 /* protocol-specific */ 207#define M_PROTO2 0x0020 /* protocol-specific */ 208#define M_PROTO3 0x0040 /* protocol-specific */ 209#define M_PROTO4 0x0080 /* protocol-specific */ 210#define M_PROTO5 0x0100 /* protocol-specific */ 211#define M_PROTO6 0x4000 /* protocol-specific (avoid M_BCAST conflict) */ 212#define M_FREELIST 0x8000 /* mbuf is on the free list */ 213 214/* mbuf pkthdr flags (also stored in m_flags) */ 215#define M_BCAST 0x0200 /* send/received as link-level broadcast */ 216#define M_MCAST 0x0400 /* send/received as link-level multicast */ 217#define M_FRAG 0x0800 /* packet is fragment of larger packet */ 218#define M_FIRSTFRAG 0x1000 /* packet is first fragment */ 219#define M_LASTFRAG 0x2000 /* packet is last fragment */ 220.Ed 221.Pp 222The available 223.Vt mbuf 224types are defined as follows: 225.Bd -literal 226/* mbuf types */ 227#define MT_DATA 1 /* dynamic (data) allocation */ 228#define MT_HEADER 2 /* packet header */ 229#define MT_SONAME 8 /* socket name */ 230#define MT_FTABLE 11 /* fragment reassembly header */ 231#define MT_TAG 13 /* volatile metadata associated to pkts */ 232#define MT_CONTROL 14 /* extra-data protocol message */ 233#define MT_OOBDATA 15 /* expedited data */ 234.Ed 235.Pp 236If the 237.Dv M_PKTHDR 238flag is set, a 239.Vt struct pkthdr Va m_pkthdr 240is added to the 241.Vt mbuf 242header. 243It contains a pointer to the interface 244the packet has been received from 245.Pq Vt struct ifnet Va *rcvif , 246and the total packet length 247.Pq Vt int Va len . 248Fields used in offloading checksum calculation to the hardware are kept in 249.Va m_pkthdr 250as well. 251See 252.Sx HARDWARE-ASSISTED CHECKSUM CALCULATION 253for details. 254.Pp 255If small enough, data is stored in the internal data buffer of an 256.Vt mbuf . 257If the data is sufficiently large, another 258.Vt mbuf 259may be added to the 260.Vt mbuf chain , 261or external storage may be associated with the 262.Vt mbuf . 263.Dv MHLEN 264bytes of data can fit into an 265.Vt mbuf 266with the 267.Dv M_PKTHDR 268flag set, 269.Dv MLEN 270bytes can otherwise. 271.Pp 272If external storage is being associated with an 273.Vt mbuf , 274the 275.Va m_ext 276header is added at the cost of losing the internal data buffer. 277It includes a pointer to external storage, the size of the storage, 278a pointer to a function used for freeing the storage, 279a pointer to an optional argument that can be passed to the function, 280and a pointer to a reference counter. 281An 282.Vt mbuf 283using external storage has the 284.Dv M_EXT 285flag set. 286.Pp 287The system supplies a macro for allocating the desired external storage 288buffer, 289.Dv MEXTADD . 290.Pp 291The allocation and management of the reference counter is handled by the 292subsystem. 293The developer can check whether the reference count for the 294external storage of a given 295.Vt mbuf 296is greater than 1 with the 297.Dv MEXT_IS_REF 298macro. 299Similarly, the developer can directly add and remove references, 300if absolutely necessary, with the use of the 301.Dv MEXT_ADD_REF 302and 303.Dv MEXT_REM_REF 304macros. 305.Pp 306The system also supplies a default type of external storage buffer called an 307.Vt mbuf cluster . 308.Vt Mbuf clusters 309can be allocated and configured with the use of the 310.Dv MCLGET 311macro. 312Each 313.Vt mbuf cluster 314is 315.Dv MCLBYTES 316in size, where MCLBYTES is a machine-dependent constant. 317The system defines an advisory macro 318.Dv MINCLSIZE , 319which is the smallest amount of data to put into an 320.Vt mbuf cluster . 321It's equal to the sum of 322.Dv MLEN 323and 324.Dv MHLEN . 325It is typically preferable to store data into the data region of an 326.Vt mbuf , 327if size permits, as opposed to allocating a separate 328.Vt mbuf cluster 329to hold the same data. 330.\" 331.Ss Macros and Functions 332There are numerous predefined macros and functions that provide the 333developer with common utilities. 334.\" 335.Bl -ohang -offset indent 336.It Fn mtod mbuf type 337Convert an 338.Fa mbuf 339pointer to a data pointer. 340The macro expands to the data pointer cast to the pointer of the specified 341.Fa type . 342.Sy Note : 343It is advisable to ensure that there is enough contiguous data in 344.Fa mbuf . 345See 346.Fn m_pullup 347for details. 348.It Fn MGET mbuf how type 349Allocate an 350.Vt mbuf 351and initialize it to contain internal data. 352.Fa mbuf 353will point to the allocated 354.Vt mbuf 355on success, or be set to 356.Dv NULL 357on failure. 358The 359.Fa how 360argument is to be set to 361.Dv M_TRYWAIT 362or 363.Dv M_DONTWAIT . 364It specifies whether the caller is willing to block if necessary. 365If 366.Fa how 367is set to 368.Dv M_TRYWAIT , 369a failed allocation will result in the caller being put 370to sleep for a designated 371kern.ipc.mbuf_wait 372.Xr ( sysctl 8 373tunable) 374number of ticks. 375A number of other functions and macros related to 376.Vt mbufs 377have the same argument because they may 378at some point need to allocate new 379.Vt mbufs . 380.Pp 381Programmers should be careful not to confuse the 382.Vt mbuf 383allocation flag 384.Dv M_DONTWAIT 385with the 386.Xr malloc 9 387allocation flag, 388.Dv M_NOWAIT . 389They are not the same. 390.It Fn MGETHDR mbuf how type 391Allocate an 392.Vt mbuf 393and initialize it to contain a packet header 394and internal data. 395See 396.Fn MGET 397for details. 398.It Fn MCLGET mbuf how 399Allocate and attach an 400.Vt mbuf cluster 401to 402.Fa mbuf . 403If the macro fails, the 404.Dv M_EXT 405flag won't be set in 406.Fa mbuf . 407.It Fn M_ALIGN mbuf len 408Set the pointer 409.Fa mbuf->m_data 410to place an object of the size 411.Fa len 412at the end of the internal data area of 413.Fa mbuf , 414long word aligned. 415Applicable only if 416.Fa mbuf 417is newly allocated with 418.Fn MGET 419or 420.Fn m_get . 421.It Fn MH_ALIGN mbuf len 422Serves the same purpose as 423.Fn M_ALIGN 424does, but only for 425.Fa mbuf 426newly allocated with 427.Fn MGETHDR 428or 429.Fn m_gethdr , 430or initialized by 431.Fn m_dup_pkthdr 432or 433.Fn m_move_pkthdr . 434.It Fn M_LEADINGSPACE mbuf 435Returns the number of bytes available before the beginning 436of data in 437.Fa mbuf . 438.It Fn M_TRAILINGSPACE mbuf 439Returns the number of bytes available after the end of data in 440.Fa mbuf . 441.It Fn M_PREPEND mbuf len how 442This macro operates on an 443.Vt mbuf chain . 444It is an optimized wrapper for 445.Fn m_prepend 446that can make use of possible empty space before data 447(e.g. left after trimming of a link-layer header). 448The new 449.Vt mbuf chain 450pointer or 451.Dv NULL 452is in 453.Fa mbuf 454after the call. 455.It Fn M_MOVE_PKTHDR to from 456Using this macro is equivalent to calling 457.Fn m_move_pkthdr to from . 458.It Fn M_WRITABLE mbuf 459This macro will evaluate true if 460.Fa mbuf 461is not marked 462.Dv M_RDONLY 463and if either 464.Fa mbuf 465does not contain external storage or, 466if it does, 467then if the reference count of the storage is not greater than 1. 468The 469.Dv M_RDONLY 470flag can be set in 471.Fa mbuf->m_flags . 472This can be achieved during setup of the external storage, 473by passing the 474.Dv M_RDONLY 475bit as a 476.Fa flags 477argument to the 478.Fn MEXTADD 479macro, or can be directly set in individual 480.Vt mbufs . 481.It Fn MCHTYPE mbuf type 482Change the type of 483.Fa mbuf 484to 485.Fa type . 486This is a relatively expensive operation and should be avoided. 487.El 488.Pp 489The functions are: 490.Bl -ohang -offset indent 491.It Fn m_get how type 492A function version of 493.Fn MGET 494for non-critical paths. 495.It Fn m_getm orig len how type 496Allocate 497.Fa len 498bytes worth of 499.Vt mbufs 500and 501.Vt mbuf clusters 502if necessary and append the resulting allocated 503.Vt mbuf chain 504to the 505.Vt mbuf chain 506.Fa orig , 507if it is 508.No non- Ns Dv NULL . 509If the allocation fails at any point, 510free whatever was allocated and return 511.Dv NULL . 512If 513.Fa orig 514is 515.No non- Ns Dv NULL , 516it will not be freed. 517It is possible to use 518.Fn m_getm 519to either append 520.Fa len 521bytes to an existing 522.Vt mbuf 523or 524.Vt mbuf chain 525(for example, one which may be sitting in a pre-allocated ring) 526or to simply perform an all-or-nothing 527.Vt mbuf 528and 529.Vt mbuf cluster 530allocation. 531.It Fn m_gethdr how type 532A function version of 533.Fn MGETHDR 534for non-critical paths. 535.It Fn m_getcl how type flags 536Fetch an 537.Vt mbuf 538with a 539.Vt mbuf cluster 540attached to it. 541If one of the allocations fails, the entire allocation fails. 542This routine is the preferred way of fetching both the 543.Vt mbuf 544and 545.Vt mbuf cluster 546together, as it avoids having to unlock/relock between allocations. 547Returns 548.Dv NULL 549on failure. 550.It Fn m_getclr how type 551Allocate an 552.Vt mbuf 553and zero out the data region. 554.It Fn m_free mbuf 555Frees 556.Vt mbuf . 557.El 558.Pp 559The functions below operate on 560.Vt mbuf chains . 561.Bl -ohang -offset indent 562.It Fn m_freem mbuf 563Free an entire 564.Vt mbuf chain , 565including any external storage. 566.\" 567.It Fn m_adj mbuf len 568Trim 569.Fa len 570bytes from the head of an 571.Vt mbuf chain 572if 573.Fa len 574is positive, from the tail otherwise. 575.\" 576.It Fn m_prepend mbuf len how 577Allocate a new 578.Vt mbuf 579and prepend it to the 580.Vt mbuf chain , 581handle 582.Dv M_PKTHDR 583properly. 584.Sy Note : 585It doesn't allocate any 586.Vt mbuf clusters , 587so 588.Fa len 589must be less than 590.Dv MLEN 591or 592.Dv MHLEN , 593depending on the 594.Dv M_PKTHDR 595flag setting. 596.\" 597.It Fn m_pullup mbuf len 598Arrange that the first 599.Fa len 600bytes of an 601.Vt mbuf chain 602are contiguous and lay in the data area of 603.Fa mbuf , 604so they are accessible with 605.Fn mtod mbuf type . 606Return the new 607.Vt mbuf chain 608on success, 609.Dv NULL 610on failure 611(the 612.Vt mbuf chain 613is freed in this case). 614.Sy Note : 615It doesn't allocate any 616.Vt mbuf clusters , 617so 618.Fa len 619must be less than 620.Dv MHLEN . 621.\" 622.It Fn m_copym mbuf offset len how 623Make a copy of an 624.Vt mbuf chain 625starting 626.Fa offset 627bytes from the beginning, continuing for 628.Fa len 629bytes. 630If 631.Fa len 632is 633.Dv M_COPYALL , 634copy to the end of the 635.Vt mbuf chain . 636.Sy Note : 637The copy is read-only, because the 638.Vt mbuf clusters 639are not copied, only their reference counts are incremented. 640.\" 641.It Fn m_copypacket mbuf how 642Copy an entire packet including header, which must be present. 643This is an optimized version of the common case 644.Fn m_copym mbuf 0 M_COPYALL how . 645.Sy Note : 646the copy is read-only, because the 647.Vt mbuf clusters 648are not copied, only their reference counts are incremented. 649.\" 650.It Fn m_dup mbuf how 651Copy a packet header 652.Vt mbuf chain 653into a completely new 654.Vt mbuf chain , 655including copying any 656.Vt mbuf clusters . 657Use this instead of 658.Fn m_copypacket 659when you need a writable copy of an 660.Vt mbuf chain . 661.\" 662.It Fn m_copydata mbuf offset len buf 663Copy data from an 664.Vt mbuf chain 665starting 666.Fa off 667bytes from the beginning, continuing for 668.Fa len 669bytes, into the indicated buffer 670.Fa buf . 671.\" 672.It Fn m_copyback mbuf offset len buf 673Copy 674.Fa len 675bytes from the buffer 676.Fa buf 677back into the indicated 678.Vt mbuf chain , 679starting at 680.Fa offset 681bytes from the beginning of the 682.Vt mbuf chain , 683extending the 684.Vt mbuf chain 685if necessary. 686.Sy Note : 687It doesn't allocate any 688.Vt mbuf clusters , 689just adds 690.Vt mbufs 691to the 692.Vt mbuf chain . 693It's safe to set 694.Fa offset 695beyond the current 696.Vt mbuf chain 697end: zeroed 698.Vt mbufs 699will be allocated to fill the space. 700.\" 701.It Fn m_length mbuf last 702Return the length of the 703.Vt mbuf chain , 704and optionally a pointer to the last 705.Vt mbuf . 706.\" 707.It Fn m_dup_pkthdr to from how 708Upon the function's completion, the 709.Vt mbuf 710.Fa to 711will contain an identical copy of 712.Fa from->m_pkthdr 713and the per-packet attributes found in the 714.Vt mbuf chain 715.Fa from . 716The 717.Vt mbuf 718.Fa from 719must have the flag 720.Dv M_PKTHDR 721initially set, and 722.Fa to 723must be empty on entry. 724.\" 725.It Fn m_move_pkthdr to from 726Move 727.Va m_pkthdr 728and the per-packet attributes from the 729.Vt mbuf chain 730.Fa from 731to the 732.Vt mbuf 733.Fa to . 734The 735.Vt mbuf 736.Fa from 737must have the flag 738.Dv M_PKTHDR 739initially set, and 740.Fa to 741must be empty on entry. 742Upon the function's completion, 743.Fa from 744will have the flag 745.Dv M_PKTHDR 746and the per-packet attributes cleared. 747.\" 748.It Fn m_fixhdr mbuf 749Set the packet-header length to the length of the 750.Vt mbuf chain . 751.\" 752.It Fn m_devget buf len offset ifp copy 753Copy data from a device local memory pointed to by 754.Fa buf 755to an 756.Vt mbuf chain . 757The copy is done using a specified copy routine 758.Fa copy , 759or 760.Fn bcopy 761if 762.Fa copy 763is 764.Dv NULL . 765.\" 766.It Fn m_cat m n 767Concatenate 768.Fa n 769to 770.Fa m . 771Both 772.Vt mbuf chains 773must be of the same type. 774.Fa N 775is still valid after the function returned. 776.Sy Note : 777It does not handle 778.Dv M_PKTHDR 779and friends. 780.\" 781.It Fn m_split mbuf len how 782Partition an 783.Vt mbuf chain 784in two pieces, returning the tail: 785all but the first 786.Fa len 787bytes. 788In case of failure, it returns 789.Dv NULL 790and attempts to restore the 791.Vt mbuf chain 792to its original state. 793.\" 794.It Fn m_apply mbuf off len f arg 795Apply a function to an 796.Vt mbuf chain , 797at offset 798.Fa off , 799for length 800.Fa len 801bytes. 802Typically used to avoid calls to 803.Fn m_pullup 804which would otherwise be unnecessary or undesirable. 805.Fa arg 806is a convenience argument which is passed to the callback function 807.Fa f . 808.Pp 809Each time 810.Fn f 811is called, it will be passed 812.Fa arg , 813a pointer to the 814.Fa data 815in the current mbuf, and the length 816.Fa len 817of the data in this mbuf to which the function should be applied. 818.Pp 819The function should return zero to indicate success; 820otherwise, if an error is indicated, then 821.Fn m_apply 822will return the error and stop iterating through the 823.Vt mbuf chain . 824.\" 825.It Fn m_getptr mbuf loc off 826Return a pointer to the mbuf containing the data located at 827.Fa loc 828bytes from the beginning of the 829.Vt mbuf chain . 830The corresponding offset into the mbuf will be stored in 831.Fa *off . 832.It Fn m_defrag m0 how 833Defragment an mbuf chain, returning the shortest possible 834chain of mbufs and clusters. 835If allocation fails and this can not be completed, 836.Dv NULL 837will be returned and the original chain will be unchanged. 838Upon success, the original chain will be freed and the new 839chain will be returned. 840.Fa how 841should be either 842.Dv M_TRYWAIT 843or 844.Dv M_DONTWAIT , 845depending on the caller's preference. 846.Pp 847This function is especially useful in network drivers, where 848certain long mbuf chains must be shortened before being added 849to TX descriptor lists. 850.El 851.Sh HARDWARE-ASSISTED CHECKSUM CALCULATION 852This section currently applies to TCP/IP only. 853In order to save the host CPU resources, computing checksums is 854offloaded to the network interface hardware if possible. 855The 856.Va m_pkthdr 857member of the leading 858.Vt mbuf 859of a packet contains two fields used for that purpose, 860.Vt int Va csum_flags 861and 862.Vt int Va csum_data . 863The meaning of those fields depends on the direction a packet flows in, 864and on whether the packet is fragmented. 865Henceforth, 866.Va csum_flags 867or 868.Va csum_data 869of a packet 870will denote the corresponding field of the 871.Va m_pkthdr 872member of the leading 873.Vt mbuf 874in the 875.Vt mbuf chain 876containing the packet. 877.Pp 878On output, checksum offloading is attempted after the outgoing 879interface has been determined for a packet. 880The interface-specific field 881.Va ifnet.if_data.ifi_hwassist 882(see 883.Xr ifnet 9 ) 884is consulted for the capabilities of the interface to assist in 885computing checksums. 886The 887.Va csum_flags 888field of the packet header is set to indicate which actions the interface 889is supposed to perform on it. 890The actions unsupported by the network interface are done in the 891software prior to passing the packet down to the interface driver; 892such actions will never be requested through 893.Va csum_flags . 894.Pp 895The flags demanding a particular action from an interface are as follows: 896.Bl -tag -width ".Dv CSUM_TCP" -offset indent 897.It Dv CSUM_IP 898The IP header checksum is to be computed and stored in the 899corresponding field of the packet. 900The hardware is expected to know the format of an IP header 901to determine the offset of the IP checksum field. 902.It Dv CSUM_TCP 903The TCP checksum is to be computed. 904(See below.) 905.It Dv CSUM_UDP 906The UDP checksum is to be computed. 907(See below.) 908.El 909.Pp 910Should a TCP or UDP checksum be offloaded to the hardware, 911the field 912.Va csum_data 913will contain the byte offset of the checksum field relative to the 914end of the IP header. 915In this case, the checksum field will be initially 916set by the TCP/IP module to the checksum of the pseudo header 917defined by the TCP and UDP specifications. 918.Pp 919For outbound packets which have been fragmented 920by the host CPU, the following will also be true, 921regardless of the checksum flag settings: 922.Bl -bullet -offset indent 923.It 924all fragments will have the flag 925.Dv M_FRAG 926set in their 927.Va m_flags 928field; 929.It 930the first and the last fragments in the chain will have 931.Dv M_FIRSTFRAG 932or 933.Dv M_LASTFRAG 934set in their 935.Va m_flags , 936correspondingly; 937.It 938the first fragment in the chain will have the total number 939of fragments contained in its 940.Va csum_data 941field. 942.El 943.Pp 944The last rule for fragmented packets takes precedence over the one 945for a TCP or UDP checksum. 946Nevertheless, offloading a TCP or UDP checksum is possible for a 947fragmented packet if the flag 948.Dv CSUM_IP_FRAGS 949is set in the field 950.Va ifnet.if_data.ifi_hwassist 951associated with the network interface. 952However, in this case the interface is expected to figure out 953the location of the checksum field within the sequence of fragments 954by itself because 955.Va csum_data 956contains a fragment count instead of a checksum offset value. 957.Pp 958On input, an interface indicates the actions it has performed 959on a packet by setting one or more of the following flags in 960.Va csum_flags 961associated with the packet: 962.Bl -tag -width ".Dv CSUM_IP_CHECKED" -offset indent 963.It Dv CSUM_IP_CHECKED 964The IP header checksum has been computed. 965.It Dv CSUM_IP_VALID 966The IP header has a valid checksum. 967This flag can appear only in combination with 968.Dv CSUM_IP_CHECKED . 969.It Dv CSUM_DATA_VALID 970The checksum of the data portion of the IP packet has been computed 971and stored in the field 972.Va csum_data 973in network byte order. 974.It Dv CSUM_PSEUDO_HDR 975Can be set only along with 976.Dv CSUM_DATA_VALID 977to indicate that the IP data checksum found in 978.Va csum_data 979allows for the pseudo header defined by the TCP and UDP specifications. 980Otherwise the checksum of the pseudo header must be calculated by 981the host CPU and added to 982.Va csum_data 983to obtain the final checksum to be used for TCP or UDP validation purposes. 984.El 985.Pp 986If a particular network interface just indicates success or 987failure of TCP or UDP checksum validation without returning 988the exact value of the checksum to the host CPU, its driver can mark 989.Dv CSUM_DATA_VALID 990and 991.Dv CSUM_PSEUDO_HDR 992in 993.Va csum_flags , 994and set 995.Va csum_data 996to 997.Li 0xFFFF 998hexadecimal to indicate a valid checksum. 999It is a peculiarity of the algorithm used that the Internet checksum 1000calculated over any valid packet will be 1001.Li 0xFFFF 1002as long as the original checksum field is included. 1003.Pp 1004For inbound packets which are IP fragments, all 1005.Va csum_data 1006fields will be summed during reassembly to obtain the final checksum 1007value passed to an upper layer in the 1008.Va csum_data 1009field of the reassembled packet. 1010The 1011.Va csum_flags 1012fields of all fragments will be consolidated using logical AND 1013to obtain the final value for 1014.Va csum_flags . 1015Thus, in order to successfully 1016offload checksum computation for fragmented data, 1017all fragments should have the same value of 1018.Va csum_flags . 1019.Sh STRESS TESTING 1020When running a kernel compiled with the option 1021.Dv MBUF_STRESS_TEST , 1022the following 1023.Xr sysctl 8 Ns 1024-controlled options may be used to create 1025various failure/extreme cases for testing of network drivers 1026and other parts of the kernel that rely on 1027.Vt mbufs . 1028.Bl -tag -width ident 1029.It Va net.inet.ip.mbuf_frag_size 1030Causes 1031.Fn ip_output 1032to fragment outgoing 1033.Vt mbuf chains 1034into fragments of the specified size. 1035Setting this variable to 1 is an excellent way to 1036test the long 1037.Vt mbuf chain 1038handling ability of network drivers. 1039.It Va kern.ipc.m_defragrandomfailures 1040Causes the function 1041.Fn m_defrag 1042to randomly fail, returning 1043.Dv NULL . 1044Any piece of code which uses 1045.Fn m_defrag 1046should be tested with this feature. 1047.El 1048.Sh RETURN VALUES 1049See above. 1050.Sh SEE ALSO 1051.Xr ifnet 9 1052.Sh HISTORY 1053.\" Please correct me if I'm wrong 1054.Vt Mbufs 1055appeared in an early version of 1056.Bx . 1057Besides being used for network packets, they were used 1058to store various dynamic structures, such as routing table 1059entries, interface addresses, protocol control blocks, etc. 1060.Sh AUTHORS 1061The original 1062.Nm 1063man page was written by Yar Tikhiy. 1064