xref: /freebsd/share/man/man9/mbuf.9 (revision 7562eaabc01a48e6b11d5b558c41e3b92dae5c2d)
1.\" Copyright (c) 2000 FreeBSD Inc.
2.\" All rights reserved.
3.\"
4.\" Redistribution and use in source and binary forms, with or without
5.\" modification, are permitted provided that the following conditions
6.\" are met:
7.\" 1. Redistributions of source code must retain the above copyright
8.\"    notice, this list of conditions and the following disclaimer.
9.\" 2. Redistributions in binary form must reproduce the above copyright
10.\"    notice, this list of conditions and the following disclaimer in the
11.\"    documentation and/or other materials provided with the distribution.
12.\"
13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16.\" ARE DISCLAIMED.  IN NO EVENT SHALL [your name] OR CONTRIBUTORS BE LIABLE
17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23.\" SUCH DAMAGE.
24.\"
25.\" $FreeBSD$
26.\"
27.Dd August 27, 2004
28.Dt MBUF 9
29.Os
30.\"
31.Sh NAME
32.Nm mbuf
33.Nd "memory management in the kernel IPC subsystem"
34.\"
35.Sh SYNOPSIS
36.In sys/param.h
37.In sys/systm.h
38.In sys/mbuf.h
39.\"
40.Ss Mbuf allocation macros
41.Fn MGET "struct mbuf *mbuf" "int how" "short type"
42.Fn MGETHDR "struct mbuf *mbuf" "int how" "short type"
43.Fn MCLGET "struct mbuf *mbuf" "int how"
44.Fo MEXTADD
45.Fa "struct mbuf *mbuf"
46.Fa "caddr_t buf"
47.Fa "u_int size"
48.Fa "void (*free)(void *opt_args)"
49.Fa "void *opt_args"
50.Fa "short flags"
51.Fa "int type"
52.Fc
53.Fn MEXTFREE "struct mbuf *mbuf"
54.Fn MEXT_ADD_REF "struct mbuf *mbuf"
55.Fn MEXT_REM_REF "struct mbuf *mbuf"
56.Fn MFREE "struct mbuf *mbuf" "struct mbuf *successor"
57.\"
58.Ss Mbuf utility macros
59.Fn mtod "struct mbuf *mbuf" "type"
60.Ft int
61.Fn MEXT_IS_REF "struct mbuf *mbuf"
62.Fn M_ALIGN "struct mbuf *mbuf" "u_int len"
63.Fn MH_ALIGN "struct mbuf *mbuf" "u_int len"
64.Ft int
65.Fn M_LEADINGSPACE "struct mbuf *mbuf"
66.Ft int
67.Fn M_TRAILINGSPACE "struct mbuf *mbuf"
68.Fn M_MOVE_PKTHDR "struct mbuf *to" "struct mbuf *from"
69.Fn M_PREPEND "struct mbuf *mbuf" "int len" "int how"
70.Fn MCHTYPE "struct mbuf *mbuf" "u_int type"
71.Ft int
72.Fn M_WRITABLE "struct mbuf *mbuf"
73.\"
74.Ss Mbuf allocation functions
75.Ft struct mbuf *
76.Fn m_get "int how" "int type"
77.Ft struct mbuf *
78.Fn m_getm "struct mbuf *orig" "int len" "int how" "int type"
79.Ft struct mbuf *
80.Fn m_getcl "int how" "short type" "int flags"
81.Ft struct mbuf *
82.Fn m_getclr "int how" "int type"
83.Ft struct mbuf *
84.Fn m_gethdr "int how" "int type"
85.Ft struct mbuf *
86.Fn m_free "struct mbuf *mbuf"
87.Ft void
88.Fn m_freem "struct mbuf *mbuf"
89.\"
90.Ss Mbuf utility functions
91.Ft void
92.Fn m_adj "struct mbuf *mbuf" "int len"
93.Ft struct mbuf *
94.Fn m_prepend "struct mbuf *mbuf" "int len" "int how"
95.Ft struct mbuf *
96.Fn m_pullup "struct mbuf *mbuf" "int len"
97.Ft struct mbuf *
98.Fn m_copym "struct mbuf *mbuf" "int offset" "int len" "int how"
99.Ft struct mbuf *
100.Fn m_copypacket "struct mbuf *mbuf" "int how"
101.Ft struct mbuf *
102.Fn m_dup "struct mbuf *mbuf" "int how"
103.Ft void
104.Fn m_copydata "const struct mbuf *mbuf" "int offset" "int len" "caddr_t buf"
105.Ft void
106.Fn m_copyback "struct mbuf *mbuf" "int offset" "int len" "caddr_t buf"
107.Ft struct mbuf *
108.Fo m_devget
109.Fa "char *buf"
110.Fa "int len"
111.Fa "int offset"
112.Fa "struct ifnet *ifp"
113.Fa "void (*copy)(char *from, caddr_t to, u_int len)"
114.Fc
115.Ft void
116.Fn m_cat "struct mbuf *m" "struct mbuf *n"
117.Ft u_int
118.Fn m_fixhdr "struct mbuf *mbuf"
119.Ft void
120.Fn m_dup_pkthdr "struct mbuf *to" "struct mbuf *from"
121.Ft void
122.Fn m_move_pkthdr "struct mbuf *to" "struct mbuf *from"
123.Ft u_int
124.Fn m_length "struct mbuf *mbuf" "struct mbuf **last"
125.Ft struct mbuf *
126.Fn m_split "struct mbuf *mbuf" "int len" "int how"
127.Ft int
128.Fn m_apply "struct mbuf *mbuf" "int off" "int len" "int (*f)(void *arg, void *data, u_int len)" "void *arg"
129.Ft struct mbuf *
130.Fn m_getptr "struct mbuf *mbuf" "int loc" "int *off"
131.Ft struct mbuf *
132.Fn m_defrag "struct mbuf *m0" "int how"
133.\"
134.Sh DESCRIPTION
135An
136.Vt mbuf
137is a basic unit of memory management in the kernel IPC subsystem.
138Network packets and socket buffers are stored in
139.Vt mbufs .
140A network packet may span multiple
141.Vt mbufs
142arranged into a
143.Vt mbuf chain
144(linked list),
145which allows adding or trimming
146network headers with little overhead.
147.Pp
148While a developer should not bother with
149.Vt mbuf
150internals without serious
151reason in order to avoid incompatibilities with future changes, it
152is useful to understand the general structure of an
153.Vt mbuf .
154.Pp
155An
156.Vt mbuf
157consists of a variable-sized header and a small internal
158buffer for data.
159The total size of an
160.Vt mbuf ,
161.Dv MSIZE ,
162is a constant defined in
163.In sys/param.h .
164The
165.Vt mbuf
166header includes:
167.Pp
168.Bl -tag -width "m_nextpkt" -offset indent
169.It Va m_next
170.Pq Vt struct mbuf *
171A pointer to the next
172.Vt mbuf
173in the
174.Vt mbuf chain .
175.It Va m_nextpkt
176.Pq Vt struct mbuf *
177A pointer to the next
178.Vt mbuf chain
179in the queue.
180.It Va m_data
181.Pq Vt caddr_t
182A pointer to data attached to this
183.Vt mbuf .
184.It Va m_len
185.Pq Vt int
186The length of the data.
187.It Va m_type
188.Pq Vt short
189The type of the data.
190.It Va m_flags
191.Pq Vt int
192The
193.Vt mbuf
194flags.
195.El
196.Pp
197The
198.Vt mbuf
199flag bits are defined as follows:
200.Bd -literal
201/* mbuf flags */
202#define	M_EXT		0x0001	/* has associated external storage */
203#define	M_PKTHDR	0x0002	/* start of record */
204#define	M_EOR		0x0004	/* end of record */
205#define	M_RDONLY	0x0008	/* associated data marked read-only */
206#define	M_PROTO1	0x0010	/* protocol-specific */
207#define	M_PROTO2	0x0020 	/* protocol-specific */
208#define	M_PROTO3	0x0040	/* protocol-specific */
209#define	M_PROTO4	0x0080	/* protocol-specific */
210#define	M_PROTO5	0x0100	/* protocol-specific */
211#define	M_PROTO6	0x4000	/* protocol-specific (avoid M_BCAST conflict) */
212#define	M_FREELIST	0x8000	/* mbuf is on the free list */
213
214/* mbuf pkthdr flags (also stored in m_flags) */
215#define	M_BCAST		0x0200	/* send/received as link-level broadcast */
216#define	M_MCAST		0x0400	/* send/received as link-level multicast */
217#define	M_FRAG		0x0800	/* packet is fragment of larger packet */
218#define	M_FIRSTFRAG	0x1000	/* packet is first fragment */
219#define	M_LASTFRAG	0x2000	/* packet is last fragment */
220.Ed
221.Pp
222The available
223.Vt mbuf
224types are defined as follows:
225.Bd -literal
226/* mbuf types */
227#define	MT_DATA		1	/* dynamic (data) allocation */
228#define	MT_HEADER	2	/* packet header */
229#define	MT_SONAME	8	/* socket name */
230#define	MT_FTABLE	11	/* fragment reassembly header */
231#define	MT_CONTROL	14	/* extra-data protocol message */
232#define	MT_OOBDATA	15	/* expedited data */
233.Ed
234.Pp
235If the
236.Dv M_PKTHDR
237flag is set, a
238.Vt struct pkthdr Va m_pkthdr
239is added to the
240.Vt mbuf
241header.
242It contains a pointer to the interface
243the packet has been received from
244.Pq Vt struct ifnet Va *rcvif ,
245and the total packet length
246.Pq Vt int Va len .
247Optionally, it may also contain an attached list of packet tags
248.Pq Vt "struct m_tag" .
249See
250.Xr mbuf_tags 9
251for details.
252Fields used in offloading checksum calculation to the hardware are kept in
253.Va m_pkthdr
254as well.
255See
256.Sx HARDWARE-ASSISTED CHECKSUM CALCULATION
257for details.
258.Pp
259If small enough, data is stored in the internal data buffer of an
260.Vt mbuf .
261If the data is sufficiently large, another
262.Vt mbuf
263may be added to the
264.Vt mbuf chain ,
265or external storage may be associated with the
266.Vt mbuf .
267.Dv MHLEN
268bytes of data can fit into an
269.Vt mbuf
270with the
271.Dv M_PKTHDR
272flag set,
273.Dv MLEN
274bytes can otherwise.
275.Pp
276If external storage is being associated with an
277.Vt mbuf ,
278the
279.Va m_ext
280header is added at the cost of losing the internal data buffer.
281It includes a pointer to external storage, the size of the storage,
282a pointer to a function used for freeing the storage,
283a pointer to an optional argument that can be passed to the function,
284and a pointer to a reference counter.
285An
286.Vt mbuf
287using external storage has the
288.Dv M_EXT
289flag set.
290.Pp
291The system supplies a macro for allocating the desired external storage
292buffer,
293.Dv MEXTADD .
294.Pp
295The allocation and management of the reference counter is handled by the
296subsystem.
297The developer can check whether the reference count for the
298external storage of a given
299.Vt mbuf
300is greater than 1 with the
301.Dv MEXT_IS_REF
302macro.
303Similarly, the developer can directly add and remove references,
304if absolutely necessary, with the use of the
305.Dv MEXT_ADD_REF
306and
307.Dv MEXT_REM_REF
308macros.
309.Pp
310The system also supplies a default type of external storage buffer called an
311.Vt mbuf cluster .
312.Vt Mbuf clusters
313can be allocated and configured with the use of the
314.Dv MCLGET
315macro.
316Each
317.Vt mbuf cluster
318is
319.Dv MCLBYTES
320in size, where MCLBYTES is a machine-dependent constant.
321The system defines an advisory macro
322.Dv MINCLSIZE ,
323which is the smallest amount of data to put into an
324.Vt mbuf cluster .
325It's equal to the sum of
326.Dv MLEN
327and
328.Dv MHLEN .
329It is typically preferable to store data into the data region of an
330.Vt mbuf ,
331if size permits, as opposed to allocating a separate
332.Vt mbuf cluster
333to hold the same data.
334.\"
335.Ss Macros and Functions
336There are numerous predefined macros and functions that provide the
337developer with common utilities.
338.\"
339.Bl -ohang -offset indent
340.It Fn mtod mbuf type
341Convert an
342.Fa mbuf
343pointer to a data pointer.
344The macro expands to the data pointer cast to the pointer of the specified
345.Fa type .
346.Sy Note :
347It is advisable to ensure that there is enough contiguous data in
348.Fa mbuf .
349See
350.Fn m_pullup
351for details.
352.It Fn MGET mbuf how type
353Allocate an
354.Vt mbuf
355and initialize it to contain internal data.
356.Fa mbuf
357will point to the allocated
358.Vt mbuf
359on success, or be set to
360.Dv NULL
361on failure.
362The
363.Fa how
364argument is to be set to
365.Dv M_TRYWAIT
366or
367.Dv M_DONTWAIT .
368It specifies whether the caller is willing to block if necessary.
369If
370.Fa how
371is set to
372.Dv M_TRYWAIT ,
373a failed allocation will result in the caller being put
374to sleep for a designated
375kern.ipc.mbuf_wait
376.Xr ( sysctl 8
377tunable)
378number of ticks.
379A number of other functions and macros related to
380.Vt mbufs
381have the same argument because they may
382at some point need to allocate new
383.Vt mbufs .
384.Pp
385Programmers should be careful not to confuse the
386.Vt mbuf
387allocation flag
388.Dv M_DONTWAIT
389with the
390.Xr malloc 9
391allocation flag,
392.Dv M_NOWAIT .
393They are not the same.
394.It Fn MGETHDR mbuf how type
395Allocate an
396.Vt mbuf
397and initialize it to contain a packet header
398and internal data.
399See
400.Fn MGET
401for details.
402.It Fn MCLGET mbuf how
403Allocate and attach an
404.Vt mbuf cluster
405to
406.Fa mbuf .
407If the macro fails, the
408.Dv M_EXT
409flag won't be set in
410.Fa mbuf .
411.It Fn M_ALIGN mbuf len
412Set the pointer
413.Fa mbuf->m_data
414to place an object of the size
415.Fa len
416at the end of the internal data area of
417.Fa mbuf ,
418long word aligned.
419Applicable only if
420.Fa mbuf
421is newly allocated with
422.Fn MGET
423or
424.Fn m_get .
425.It Fn MH_ALIGN mbuf len
426Serves the same purpose as
427.Fn M_ALIGN
428does, but only for
429.Fa mbuf
430newly allocated with
431.Fn MGETHDR
432or
433.Fn m_gethdr ,
434or initialized by
435.Fn m_dup_pkthdr
436or
437.Fn m_move_pkthdr .
438.It Fn M_LEADINGSPACE mbuf
439Returns the number of bytes available before the beginning
440of data in
441.Fa mbuf .
442.It Fn M_TRAILINGSPACE mbuf
443Returns the number of bytes available after the end of data in
444.Fa mbuf .
445.It Fn M_PREPEND mbuf len how
446This macro operates on an
447.Vt mbuf chain .
448It is an optimized wrapper for
449.Fn m_prepend
450that can make use of possible empty space before data
451(e.g.\& left after trimming of a link-layer header).
452The new
453.Vt mbuf chain
454pointer or
455.Dv NULL
456is in
457.Fa mbuf
458after the call.
459.It Fn M_MOVE_PKTHDR to from
460Using this macro is equivalent to calling
461.Fn m_move_pkthdr to from .
462.It Fn M_WRITABLE mbuf
463This macro will evaluate true if
464.Fa mbuf
465is not marked
466.Dv M_RDONLY
467and if either
468.Fa mbuf
469does not contain external storage or,
470if it does,
471then if the reference count of the storage is not greater than 1.
472The
473.Dv M_RDONLY
474flag can be set in
475.Fa mbuf->m_flags .
476This can be achieved during setup of the external storage,
477by passing the
478.Dv M_RDONLY
479bit as a
480.Fa flags
481argument to the
482.Fn MEXTADD
483macro, or can be directly set in individual
484.Vt mbufs .
485.It Fn MCHTYPE mbuf type
486Change the type of
487.Fa mbuf
488to
489.Fa type .
490This is a relatively expensive operation and should be avoided.
491.El
492.Pp
493The functions are:
494.Bl -ohang -offset indent
495.It Fn m_get how type
496A function version of
497.Fn MGET
498for non-critical paths.
499.It Fn m_getm orig len how type
500Allocate
501.Fa len
502bytes worth of
503.Vt mbufs
504and
505.Vt mbuf clusters
506if necessary and append the resulting allocated
507.Vt mbuf chain
508to the
509.Vt mbuf chain
510.Fa orig ,
511if it is
512.No non- Ns Dv NULL .
513If the allocation fails at any point,
514free whatever was allocated and return
515.Dv NULL .
516If
517.Fa orig
518is
519.No non- Ns Dv NULL ,
520it will not be freed.
521It is possible to use
522.Fn m_getm
523to either append
524.Fa len
525bytes to an existing
526.Vt mbuf
527or
528.Vt mbuf chain
529(for example, one which may be sitting in a pre-allocated ring)
530or to simply perform an all-or-nothing
531.Vt mbuf
532and
533.Vt mbuf cluster
534allocation.
535.It Fn m_gethdr how type
536A function version of
537.Fn MGETHDR
538for non-critical paths.
539.It Fn m_getcl how type flags
540Fetch an
541.Vt mbuf
542with a
543.Vt mbuf cluster
544attached to it.
545If one of the allocations fails, the entire allocation fails.
546This routine is the preferred way of fetching both the
547.Vt mbuf
548and
549.Vt mbuf cluster
550together, as it avoids having to unlock/relock between allocations.
551Returns
552.Dv NULL
553on failure.
554.It Fn m_getclr how type
555Allocate an
556.Vt mbuf
557and zero out the data region.
558.It Fn m_free mbuf
559Frees
560.Vt mbuf .
561Returns
562.Va m_next
563of the freed
564.Vt mbuf .
565.El
566.Pp
567The functions below operate on
568.Vt mbuf chains .
569.Bl -ohang -offset indent
570.It Fn m_freem mbuf
571Free an entire
572.Vt mbuf chain ,
573including any external storage.
574.\"
575.It Fn m_adj mbuf len
576Trim
577.Fa len
578bytes from the head of an
579.Vt mbuf chain
580if
581.Fa len
582is positive, from the tail otherwise.
583.\"
584.It Fn m_prepend mbuf len how
585Allocate a new
586.Vt mbuf
587and prepend it to the
588.Vt mbuf chain ,
589handle
590.Dv M_PKTHDR
591properly.
592.Sy Note :
593It doesn't allocate any
594.Vt mbuf clusters ,
595so
596.Fa len
597must be less than
598.Dv MLEN
599or
600.Dv MHLEN ,
601depending on the
602.Dv M_PKTHDR
603flag setting.
604.\"
605.It Fn m_pullup mbuf len
606Arrange that the first
607.Fa len
608bytes of an
609.Vt mbuf chain
610are contiguous and lay in the data area of
611.Fa mbuf ,
612so they are accessible with
613.Fn mtod mbuf type .
614Return the new
615.Vt mbuf chain
616on success,
617.Dv NULL
618on failure
619(the
620.Vt mbuf chain
621is freed in this case).
622.Sy Note :
623It doesn't allocate any
624.Vt mbuf clusters ,
625so
626.Fa len
627must be less than
628.Dv MHLEN .
629.\"
630.It Fn m_copym mbuf offset len how
631Make a copy of an
632.Vt mbuf chain
633starting
634.Fa offset
635bytes from the beginning, continuing for
636.Fa len
637bytes.
638If
639.Fa len
640is
641.Dv M_COPYALL ,
642copy to the end of the
643.Vt mbuf chain .
644.Sy Note :
645The copy is read-only, because the
646.Vt mbuf clusters
647are not copied, only their reference counts are incremented.
648.\"
649.It Fn m_copypacket mbuf how
650Copy an entire packet including header, which must be present.
651This is an optimized version of the common case
652.Fn m_copym mbuf 0 M_COPYALL how .
653.Sy Note :
654the copy is read-only, because the
655.Vt mbuf clusters
656are not copied, only their reference counts are incremented.
657.\"
658.It Fn m_dup mbuf how
659Copy a packet header
660.Vt mbuf chain
661into a completely new
662.Vt mbuf chain ,
663including copying any
664.Vt mbuf clusters .
665Use this instead of
666.Fn m_copypacket
667when you need a writable copy of an
668.Vt mbuf chain .
669.\"
670.It Fn m_copydata mbuf offset len buf
671Copy data from an
672.Vt mbuf chain
673starting
674.Fa off
675bytes from the beginning, continuing for
676.Fa len
677bytes, into the indicated buffer
678.Fa buf .
679.\"
680.It Fn m_copyback mbuf offset len buf
681Copy
682.Fa len
683bytes from the buffer
684.Fa buf
685back into the indicated
686.Vt mbuf chain ,
687starting at
688.Fa offset
689bytes from the beginning of the
690.Vt mbuf chain ,
691extending the
692.Vt mbuf chain
693if necessary.
694.Sy Note :
695It doesn't allocate any
696.Vt mbuf clusters ,
697just adds
698.Vt mbufs
699to the
700.Vt mbuf chain .
701It's safe to set
702.Fa offset
703beyond the current
704.Vt mbuf chain
705end: zeroed
706.Vt mbufs
707will be allocated to fill the space.
708.\"
709.It Fn m_length mbuf last
710Return the length of the
711.Vt mbuf chain ,
712and optionally a pointer to the last
713.Vt mbuf .
714.\"
715.It Fn m_dup_pkthdr to from how
716Upon the function's completion, the
717.Vt mbuf
718.Fa to
719will contain an identical copy of
720.Fa from->m_pkthdr
721and the per-packet attributes found in the
722.Vt mbuf chain
723.Fa from .
724The
725.Vt mbuf
726.Fa from
727must have the flag
728.Dv M_PKTHDR
729initially set, and
730.Fa to
731must be empty on entry.
732.\"
733.It Fn m_move_pkthdr to from
734Move
735.Va m_pkthdr
736and the per-packet attributes from the
737.Vt mbuf chain
738.Fa from
739to the
740.Vt mbuf
741.Fa to .
742The
743.Vt mbuf
744.Fa from
745must have the flag
746.Dv M_PKTHDR
747initially set, and
748.Fa to
749must be empty on entry.
750Upon the function's completion,
751.Fa from
752will have the flag
753.Dv M_PKTHDR
754and the per-packet attributes cleared.
755.\"
756.It Fn m_fixhdr mbuf
757Set the packet-header length to the length of the
758.Vt mbuf chain .
759.\"
760.It Fn m_devget buf len offset ifp copy
761Copy data from a device local memory pointed to by
762.Fa buf
763to an
764.Vt mbuf chain .
765The copy is done using a specified copy routine
766.Fa copy ,
767or
768.Fn bcopy
769if
770.Fa copy
771is
772.Dv NULL .
773.\"
774.It Fn m_cat m n
775Concatenate
776.Fa n
777to
778.Fa m .
779Both
780.Vt mbuf chains
781must be of the same type.
782.Fa N
783is still valid after the function returned.
784.Sy Note :
785It does not handle
786.Dv M_PKTHDR
787and friends.
788.\"
789.It Fn m_split mbuf len how
790Partition an
791.Vt mbuf chain
792in two pieces, returning the tail:
793all but the first
794.Fa len
795bytes.
796In case of failure, it returns
797.Dv NULL
798and attempts to restore the
799.Vt mbuf chain
800to its original state.
801.\"
802.It Fn m_apply mbuf off len f arg
803Apply a function to an
804.Vt mbuf chain ,
805at offset
806.Fa off ,
807for length
808.Fa len
809bytes.
810Typically used to avoid calls to
811.Fn m_pullup
812which would otherwise be unnecessary or undesirable.
813.Fa arg
814is a convenience argument which is passed to the callback function
815.Fa f .
816.Pp
817Each time
818.Fn f
819is called, it will be passed
820.Fa arg ,
821a pointer to the
822.Fa data
823in the current mbuf, and the length
824.Fa len
825of the data in this mbuf to which the function should be applied.
826.Pp
827The function should return zero to indicate success;
828otherwise, if an error is indicated, then
829.Fn m_apply
830will return the error and stop iterating through the
831.Vt mbuf chain .
832.\"
833.It Fn m_getptr mbuf loc off
834Return a pointer to the mbuf containing the data located at
835.Fa loc
836bytes from the beginning of the
837.Vt mbuf chain .
838The corresponding offset into the mbuf will be stored in
839.Fa *off .
840.It Fn m_defrag m0 how
841Defragment an mbuf chain, returning the shortest possible
842chain of mbufs and clusters.
843If allocation fails and this can not be completed,
844.Dv NULL
845will be returned and the original chain will be unchanged.
846Upon success, the original chain will be freed and the new
847chain will be returned.
848.Fa how
849should be either
850.Dv M_TRYWAIT
851or
852.Dv M_DONTWAIT ,
853depending on the caller's preference.
854.Pp
855This function is especially useful in network drivers, where
856certain long mbuf chains must be shortened before being added
857to TX descriptor lists.
858.El
859.Sh HARDWARE-ASSISTED CHECKSUM CALCULATION
860This section currently applies to TCP/IP only.
861In order to save the host CPU resources, computing checksums is
862offloaded to the network interface hardware if possible.
863The
864.Va m_pkthdr
865member of the leading
866.Vt mbuf
867of a packet contains two fields used for that purpose,
868.Vt int Va csum_flags
869and
870.Vt int Va csum_data .
871The meaning of those fields depends on the direction a packet flows in,
872and on whether the packet is fragmented.
873Henceforth,
874.Va csum_flags
875or
876.Va csum_data
877of a packet
878will denote the corresponding field of the
879.Va m_pkthdr
880member of the leading
881.Vt mbuf
882in the
883.Vt mbuf chain
884containing the packet.
885.Pp
886On output, checksum offloading is attempted after the outgoing
887interface has been determined for a packet.
888The interface-specific field
889.Va ifnet.if_data.ifi_hwassist
890(see
891.Xr ifnet 9 )
892is consulted for the capabilities of the interface to assist in
893computing checksums.
894The
895.Va csum_flags
896field of the packet header is set to indicate which actions the interface
897is supposed to perform on it.
898The actions unsupported by the network interface are done in the
899software prior to passing the packet down to the interface driver;
900such actions will never be requested through
901.Va csum_flags .
902.Pp
903The flags demanding a particular action from an interface are as follows:
904.Bl -tag -width ".Dv CSUM_TCP" -offset indent
905.It Dv CSUM_IP
906The IP header checksum is to be computed and stored in the
907corresponding field of the packet.
908The hardware is expected to know the format of an IP header
909to determine the offset of the IP checksum field.
910.It Dv CSUM_TCP
911The TCP checksum is to be computed.
912(See below.)
913.It Dv CSUM_UDP
914The UDP checksum is to be computed.
915(See below.)
916.El
917.Pp
918Should a TCP or UDP checksum be offloaded to the hardware,
919the field
920.Va csum_data
921will contain the byte offset of the checksum field relative to the
922end of the IP header.
923In this case, the checksum field will be initially
924set by the TCP/IP module to the checksum of the pseudo header
925defined by the TCP and UDP specifications.
926.Pp
927For outbound packets which have been fragmented
928by the host CPU, the following will also be true,
929regardless of the checksum flag settings:
930.Bl -bullet -offset indent
931.It
932all fragments will have the flag
933.Dv M_FRAG
934set in their
935.Va m_flags
936field;
937.It
938the first and the last fragments in the chain will have
939.Dv M_FIRSTFRAG
940or
941.Dv M_LASTFRAG
942set in their
943.Va m_flags ,
944correspondingly;
945.It
946the first fragment in the chain will have the total number
947of fragments contained in its
948.Va csum_data
949field.
950.El
951.Pp
952The last rule for fragmented packets takes precedence over the one
953for a TCP or UDP checksum.
954Nevertheless, offloading a TCP or UDP checksum is possible for a
955fragmented packet if the flag
956.Dv CSUM_IP_FRAGS
957is set in the field
958.Va ifnet.if_data.ifi_hwassist
959associated with the network interface.
960However, in this case the interface is expected to figure out
961the location of the checksum field within the sequence of fragments
962by itself because
963.Va csum_data
964contains a fragment count instead of a checksum offset value.
965.Pp
966On input, an interface indicates the actions it has performed
967on a packet by setting one or more of the following flags in
968.Va csum_flags
969associated with the packet:
970.Bl -tag -width ".Dv CSUM_IP_CHECKED" -offset indent
971.It Dv CSUM_IP_CHECKED
972The IP header checksum has been computed.
973.It Dv CSUM_IP_VALID
974The IP header has a valid checksum.
975This flag can appear only in combination with
976.Dv CSUM_IP_CHECKED .
977.It Dv CSUM_DATA_VALID
978The checksum of the data portion of the IP packet has been computed
979and stored in the field
980.Va csum_data
981in network byte order.
982.It Dv CSUM_PSEUDO_HDR
983Can be set only along with
984.Dv CSUM_DATA_VALID
985to indicate that the IP data checksum found in
986.Va csum_data
987allows for the pseudo header defined by the TCP and UDP specifications.
988Otherwise the checksum of the pseudo header must be calculated by
989the host CPU and added to
990.Va csum_data
991to obtain the final checksum to be used for TCP or UDP validation purposes.
992.El
993.Pp
994If a particular network interface just indicates success or
995failure of TCP or UDP checksum validation without returning
996the exact value of the checksum to the host CPU, its driver can mark
997.Dv CSUM_DATA_VALID
998and
999.Dv CSUM_PSEUDO_HDR
1000in
1001.Va csum_flags ,
1002and set
1003.Va csum_data
1004to
1005.Li 0xFFFF
1006hexadecimal to indicate a valid checksum.
1007It is a peculiarity of the algorithm used that the Internet checksum
1008calculated over any valid packet will be
1009.Li 0xFFFF
1010as long as the original checksum field is included.
1011.Pp
1012For inbound packets which are IP fragments, all
1013.Va csum_data
1014fields will be summed during reassembly to obtain the final checksum
1015value passed to an upper layer in the
1016.Va csum_data
1017field of the reassembled packet.
1018The
1019.Va csum_flags
1020fields of all fragments will be consolidated using logical AND
1021to obtain the final value for
1022.Va csum_flags .
1023Thus, in order to successfully
1024offload checksum computation for fragmented data,
1025all fragments should have the same value of
1026.Va csum_flags .
1027.Sh STRESS TESTING
1028When running a kernel compiled with the option
1029.Dv MBUF_STRESS_TEST ,
1030the following
1031.Xr sysctl 8 Ns
1032-controlled options may be used to create
1033various failure/extreme cases for testing of network drivers
1034and other parts of the kernel that rely on
1035.Vt mbufs .
1036.Bl -tag -width ident
1037.It Va net.inet.ip.mbuf_frag_size
1038Causes
1039.Fn ip_output
1040to fragment outgoing
1041.Vt mbuf chains
1042into fragments of the specified size.
1043Setting this variable to 1 is an excellent way to
1044test the long
1045.Vt mbuf chain
1046handling ability of network drivers.
1047.It Va kern.ipc.m_defragrandomfailures
1048Causes the function
1049.Fn m_defrag
1050to randomly fail, returning
1051.Dv NULL .
1052Any piece of code which uses
1053.Fn m_defrag
1054should be tested with this feature.
1055.El
1056.Sh RETURN VALUES
1057See above.
1058.Sh SEE ALSO
1059.Xr ifnet 9 ,
1060.Xr mbuf_tags 9
1061.Sh HISTORY
1062.\" Please correct me if I'm wrong
1063.Vt Mbufs
1064appeared in an early version of
1065.Bx .
1066Besides being used for network packets, they were used
1067to store various dynamic structures, such as routing table
1068entries, interface addresses, protocol control blocks, etc.
1069.Sh AUTHORS
1070The original
1071.Nm
1072man page was written by Yar Tikhiy.
1073