xref: /freebsd/share/man/man9/mbuf.9 (revision 6b3455a7665208c366849f0b2b3bc916fb97516e)
1.\" Copyright (c) 2000 FreeBSD Inc.
2.\" All rights reserved.
3.\"
4.\" Redistribution and use in source and binary forms, with or without
5.\" modification, are permitted provided that the following conditions
6.\" are met:
7.\" 1. Redistributions of source code must retain the above copyright
8.\"    notice, this list of conditions and the following disclaimer.
9.\" 2. Redistributions in binary form must reproduce the above copyright
10.\"    notice, this list of conditions and the following disclaimer in the
11.\"    documentation and/or other materials provided with the distribution.
12.\"
13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16.\" ARE DISCLAIMED.  IN NO EVENT SHALL [your name] OR CONTRIBUTORS BE LIABLE
17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23.\" SUCH DAMAGE.
24.\"
25.\" $FreeBSD$
26.\"
27.Dd May 20, 2004
28.Dt MBUF 9
29.Os
30.\"
31.Sh NAME
32.Nm mbuf
33.Nd "memory management in the kernel IPC subsystem"
34.\"
35.Sh SYNOPSIS
36.In sys/param.h
37.In sys/systm.h
38.In sys/mbuf.h
39.\"
40.Ss Mbuf allocation macros
41.Fn MGET "struct mbuf *mbuf" "int how" "short type"
42.Fn MGETHDR "struct mbuf *mbuf" "int how" "short type"
43.Fn MCLGET "struct mbuf *mbuf" "int how"
44.Fo MEXTADD
45.Fa "struct mbuf *mbuf"
46.Fa "caddr_t buf"
47.Fa "u_int size"
48.Fa "void (*free)(void *opt_args)"
49.Fa "void *opt_args"
50.Fa "short flags"
51.Fa "int type"
52.Fc
53.Fn MEXTFREE "struct mbuf *mbuf"
54.Fn MEXT_ADD_REF "struct mbuf *mbuf"
55.Fn MEXT_REM_REF "struct mbuf *mbuf"
56.Fn MFREE "struct mbuf *mbuf" "struct mbuf *successor"
57.\"
58.Ss Mbuf utility macros
59.Fn mtod "struct mbuf *mbuf" "type"
60.Ft int
61.Fn MEXT_IS_REF "struct mbuf *mbuf"
62.Fn M_ALIGN "struct mbuf *mbuf" "u_int len"
63.Fn MH_ALIGN "struct mbuf *mbuf" "u_int len"
64.Ft int
65.Fn M_LEADINGSPACE "struct mbuf *mbuf"
66.Ft int
67.Fn M_TRAILINGSPACE "struct mbuf *mbuf"
68.Fn M_MOVE_PKTHDR "struct mbuf *to" "struct mbuf *from"
69.Fn M_PREPEND "struct mbuf *mbuf" "int len" "int how"
70.Fn MCHTYPE "struct mbuf *mbuf" "u_int type"
71.Ft int
72.Fn M_WRITABLE "struct mbuf *mbuf"
73.\"
74.Ss Mbuf allocation functions
75.Ft struct mbuf *
76.Fn m_get "int how" "int type"
77.Ft struct mbuf *
78.Fn m_getm "struct mbuf *orig" "int len" "int how" "int type"
79.Ft struct mbuf *
80.Fn m_getcl "int how" "short type" "int flags"
81.Ft struct mbuf *
82.Fn m_getclr "int how" "int type"
83.Ft struct mbuf *
84.Fn m_gethdr "int how" "int type"
85.Ft struct mbuf *
86.Fn m_free "struct mbuf *mbuf"
87.Ft void
88.Fn m_freem "struct mbuf *mbuf"
89.\"
90.Ss Mbuf utility functions
91.Ft void
92.Fn m_adj "struct mbuf *mbuf" "int len"
93.Ft struct mbuf *
94.Fn m_prepend "struct mbuf *mbuf" "int len" "int how"
95.Ft struct mbuf *
96.Fn m_pullup "struct mbuf *mbuf" "int len"
97.Ft struct mbuf *
98.Fn m_copym "struct mbuf *mbuf" "int offset" "int len" "int how"
99.Ft struct mbuf *
100.Fn m_copypacket "struct mbuf *mbuf" "int how"
101.Ft struct mbuf *
102.Fn m_dup "struct mbuf *mbuf" "int how"
103.Ft void
104.Fn m_copydata "const struct mbuf *mbuf" "int offset" "int len" "caddr_t buf"
105.Ft void
106.Fn m_copyback "struct mbuf *mbuf" "int offset" "int len" "caddr_t buf"
107.Ft struct mbuf *
108.Fo m_devget
109.Fa "char *buf"
110.Fa "int len"
111.Fa "int offset"
112.Fa "struct ifnet *ifp"
113.Fa "void (*copy)(char *from, caddr_t to, u_int len)"
114.Fc
115.Ft void
116.Fn m_cat "struct mbuf *m" "struct mbuf *n"
117.Ft u_int
118.Fn m_fixhdr "struct mbuf *mbuf"
119.Ft void
120.Fn m_dup_pkthdr "struct mbuf *to" "struct mbuf *from"
121.Ft void
122.Fn m_move_pkthdr "struct mbuf *to" "struct mbuf *from"
123.Ft u_int
124.Fn m_length "struct mbuf *mbuf" "struct mbuf **last"
125.Ft struct mbuf *
126.Fn m_split "struct mbuf *mbuf" "int len" "int how"
127.Ft int
128.Fn m_apply "struct mbuf *mbuf" "int off" "int len" "int (*f)(void *arg, void *data, u_int len)" "void *arg"
129.Ft struct mbuf *
130.Fn m_getptr "struct mbuf *mbuf" "int loc" "int *off"
131.Ft struct mbuf *
132.Fn m_defrag "struct mbuf *m0" "int how"
133.\"
134.Sh DESCRIPTION
135An
136.Vt mbuf
137is a basic unit of memory management in the kernel IPC subsystem.
138Network packets and socket buffers are stored in
139.Vt mbufs .
140A network packet may span multiple
141.Vt mbufs
142arranged into a
143.Vt mbuf chain
144(linked list),
145which allows adding or trimming
146network headers with little overhead.
147.Pp
148While a developer should not bother with
149.Vt mbuf
150internals without serious
151reason in order to avoid incompatibilities with future changes, it
152is useful to understand the general structure of an
153.Vt mbuf .
154.Pp
155An
156.Vt mbuf
157consists of a variable-sized header and a small internal
158buffer for data.
159The total size of an
160.Vt mbuf ,
161.Dv MSIZE ,
162is a constant defined in
163.In sys/param.h .
164The
165.Vt mbuf
166header includes:
167.Pp
168.Bl -tag -width "m_nextpkt" -offset indent
169.It Va m_next
170.Pq Vt struct mbuf *
171A pointer to the next
172.Vt mbuf
173in the
174.Vt mbuf chain .
175.It Va m_nextpkt
176.Pq Vt struct mbuf *
177A pointer to the next
178.Vt mbuf chain
179in the queue.
180.It Va m_data
181.Pq Vt caddr_t
182A pointer to data attached to this
183.Vt mbuf .
184.It Va m_len
185.Pq Vt int
186The length of the data.
187.It Va m_type
188.Pq Vt short
189The type of the data.
190.It Va m_flags
191.Pq Vt int
192The
193.Vt mbuf
194flags.
195.El
196.Pp
197The
198.Vt mbuf
199flag bits are defined as follows:
200.Bd -literal
201/* mbuf flags */
202#define	M_EXT		0x0001	/* has associated external storage */
203#define	M_PKTHDR	0x0002	/* start of record */
204#define	M_EOR		0x0004	/* end of record */
205#define	M_RDONLY	0x0008	/* associated data marked read-only */
206#define	M_PROTO1	0x0010	/* protocol-specific */
207#define	M_PROTO2	0x0020 	/* protocol-specific */
208#define	M_PROTO3	0x0040	/* protocol-specific */
209#define	M_PROTO4	0x0080	/* protocol-specific */
210#define	M_PROTO5	0x0100	/* protocol-specific */
211#define	M_PROTO6	0x4000	/* protocol-specific (avoid M_BCAST conflict) */
212#define	M_FREELIST	0x8000	/* mbuf is on the free list */
213
214/* mbuf pkthdr flags (also stored in m_flags) */
215#define	M_BCAST		0x0200	/* send/received as link-level broadcast */
216#define	M_MCAST		0x0400	/* send/received as link-level multicast */
217#define	M_FRAG		0x0800	/* packet is fragment of larger packet */
218#define	M_FIRSTFRAG	0x1000	/* packet is first fragment */
219#define	M_LASTFRAG	0x2000	/* packet is last fragment */
220.Ed
221.Pp
222The available
223.Vt mbuf
224types are defined as follows:
225.Bd -literal
226/* mbuf types */
227#define	MT_DATA		1	/* dynamic (data) allocation */
228#define	MT_HEADER	2	/* packet header */
229#define	MT_SONAME	8	/* socket name */
230#define	MT_FTABLE	11	/* fragment reassembly header */
231#define	MT_CONTROL	14	/* extra-data protocol message */
232#define	MT_OOBDATA	15	/* expedited data */
233.Ed
234.Pp
235If the
236.Dv M_PKTHDR
237flag is set, a
238.Vt struct pkthdr Va m_pkthdr
239is added to the
240.Vt mbuf
241header.
242It contains a pointer to the interface
243the packet has been received from
244.Pq Vt struct ifnet Va *rcvif ,
245and the total packet length
246.Pq Vt int Va len .
247Optionally, it may also contain an attached list of packet tags
248.Pq Vt "struct m_tag" .
249See
250.Xr mbuf_tags 9
251for details.
252Fields used in offloading checksum calculation to the hardware are kept in
253.Va m_pkthdr
254as well.
255See
256.Sx HARDWARE-ASSISTED CHECKSUM CALCULATION
257for details.
258.Pp
259If small enough, data is stored in the internal data buffer of an
260.Vt mbuf .
261If the data is sufficiently large, another
262.Vt mbuf
263may be added to the
264.Vt mbuf chain ,
265or external storage may be associated with the
266.Vt mbuf .
267.Dv MHLEN
268bytes of data can fit into an
269.Vt mbuf
270with the
271.Dv M_PKTHDR
272flag set,
273.Dv MLEN
274bytes can otherwise.
275.Pp
276If external storage is being associated with an
277.Vt mbuf ,
278the
279.Va m_ext
280header is added at the cost of losing the internal data buffer.
281It includes a pointer to external storage, the size of the storage,
282a pointer to a function used for freeing the storage,
283a pointer to an optional argument that can be passed to the function,
284and a pointer to a reference counter.
285An
286.Vt mbuf
287using external storage has the
288.Dv M_EXT
289flag set.
290.Pp
291The system supplies a macro for allocating the desired external storage
292buffer,
293.Dv MEXTADD .
294.Pp
295The allocation and management of the reference counter is handled by the
296subsystem.
297The developer can check whether the reference count for the
298external storage of a given
299.Vt mbuf
300is greater than 1 with the
301.Dv MEXT_IS_REF
302macro.
303Similarly, the developer can directly add and remove references,
304if absolutely necessary, with the use of the
305.Dv MEXT_ADD_REF
306and
307.Dv MEXT_REM_REF
308macros.
309.Pp
310The system also supplies a default type of external storage buffer called an
311.Vt mbuf cluster .
312.Vt Mbuf clusters
313can be allocated and configured with the use of the
314.Dv MCLGET
315macro.
316Each
317.Vt mbuf cluster
318is
319.Dv MCLBYTES
320in size, where MCLBYTES is a machine-dependent constant.
321The system defines an advisory macro
322.Dv MINCLSIZE ,
323which is the smallest amount of data to put into an
324.Vt mbuf cluster .
325It's equal to the sum of
326.Dv MLEN
327and
328.Dv MHLEN .
329It is typically preferable to store data into the data region of an
330.Vt mbuf ,
331if size permits, as opposed to allocating a separate
332.Vt mbuf cluster
333to hold the same data.
334.\"
335.Ss Macros and Functions
336There are numerous predefined macros and functions that provide the
337developer with common utilities.
338.\"
339.Bl -ohang -offset indent
340.It Fn mtod mbuf type
341Convert an
342.Fa mbuf
343pointer to a data pointer.
344The macro expands to the data pointer cast to the pointer of the specified
345.Fa type .
346.Sy Note :
347It is advisable to ensure that there is enough contiguous data in
348.Fa mbuf .
349See
350.Fn m_pullup
351for details.
352.It Fn MGET mbuf how type
353Allocate an
354.Vt mbuf
355and initialize it to contain internal data.
356.Fa mbuf
357will point to the allocated
358.Vt mbuf
359on success, or be set to
360.Dv NULL
361on failure.
362The
363.Fa how
364argument is to be set to
365.Dv M_TRYWAIT
366or
367.Dv M_DONTWAIT .
368It specifies whether the caller is willing to block if necessary.
369If
370.Fa how
371is set to
372.Dv M_TRYWAIT ,
373a failed allocation will result in the caller being put
374to sleep for a designated
375kern.ipc.mbuf_wait
376.Xr ( sysctl 8
377tunable)
378number of ticks.
379A number of other functions and macros related to
380.Vt mbufs
381have the same argument because they may
382at some point need to allocate new
383.Vt mbufs .
384.Pp
385Programmers should be careful not to confuse the
386.Vt mbuf
387allocation flag
388.Dv M_DONTWAIT
389with the
390.Xr malloc 9
391allocation flag,
392.Dv M_NOWAIT .
393They are not the same.
394.It Fn MGETHDR mbuf how type
395Allocate an
396.Vt mbuf
397and initialize it to contain a packet header
398and internal data.
399See
400.Fn MGET
401for details.
402.It Fn MCLGET mbuf how
403Allocate and attach an
404.Vt mbuf cluster
405to
406.Fa mbuf .
407If the macro fails, the
408.Dv M_EXT
409flag won't be set in
410.Fa mbuf .
411.It Fn M_ALIGN mbuf len
412Set the pointer
413.Fa mbuf->m_data
414to place an object of the size
415.Fa len
416at the end of the internal data area of
417.Fa mbuf ,
418long word aligned.
419Applicable only if
420.Fa mbuf
421is newly allocated with
422.Fn MGET
423or
424.Fn m_get .
425.It Fn MH_ALIGN mbuf len
426Serves the same purpose as
427.Fn M_ALIGN
428does, but only for
429.Fa mbuf
430newly allocated with
431.Fn MGETHDR
432or
433.Fn m_gethdr ,
434or initialized by
435.Fn m_dup_pkthdr
436or
437.Fn m_move_pkthdr .
438.It Fn M_LEADINGSPACE mbuf
439Returns the number of bytes available before the beginning
440of data in
441.Fa mbuf .
442.It Fn M_TRAILINGSPACE mbuf
443Returns the number of bytes available after the end of data in
444.Fa mbuf .
445.It Fn M_PREPEND mbuf len how
446This macro operates on an
447.Vt mbuf chain .
448It is an optimized wrapper for
449.Fn m_prepend
450that can make use of possible empty space before data
451(e.g.\& left after trimming of a link-layer header).
452The new
453.Vt mbuf chain
454pointer or
455.Dv NULL
456is in
457.Fa mbuf
458after the call.
459.It Fn M_MOVE_PKTHDR to from
460Using this macro is equivalent to calling
461.Fn m_move_pkthdr to from .
462.It Fn M_WRITABLE mbuf
463This macro will evaluate true if
464.Fa mbuf
465is not marked
466.Dv M_RDONLY
467and if either
468.Fa mbuf
469does not contain external storage or,
470if it does,
471then if the reference count of the storage is not greater than 1.
472The
473.Dv M_RDONLY
474flag can be set in
475.Fa mbuf->m_flags .
476This can be achieved during setup of the external storage,
477by passing the
478.Dv M_RDONLY
479bit as a
480.Fa flags
481argument to the
482.Fn MEXTADD
483macro, or can be directly set in individual
484.Vt mbufs .
485.It Fn MCHTYPE mbuf type
486Change the type of
487.Fa mbuf
488to
489.Fa type .
490This is a relatively expensive operation and should be avoided.
491.El
492.Pp
493The functions are:
494.Bl -ohang -offset indent
495.It Fn m_get how type
496A function version of
497.Fn MGET
498for non-critical paths.
499.It Fn m_getm orig len how type
500Allocate
501.Fa len
502bytes worth of
503.Vt mbufs
504and
505.Vt mbuf clusters
506if necessary and append the resulting allocated
507.Vt mbuf chain
508to the
509.Vt mbuf chain
510.Fa orig ,
511if it is
512.No non- Ns Dv NULL .
513If the allocation fails at any point,
514free whatever was allocated and return
515.Dv NULL .
516If
517.Fa orig
518is
519.No non- Ns Dv NULL ,
520it will not be freed.
521It is possible to use
522.Fn m_getm
523to either append
524.Fa len
525bytes to an existing
526.Vt mbuf
527or
528.Vt mbuf chain
529(for example, one which may be sitting in a pre-allocated ring)
530or to simply perform an all-or-nothing
531.Vt mbuf
532and
533.Vt mbuf cluster
534allocation.
535.It Fn m_gethdr how type
536A function version of
537.Fn MGETHDR
538for non-critical paths.
539.It Fn m_getcl how type flags
540Fetch an
541.Vt mbuf
542with a
543.Vt mbuf cluster
544attached to it.
545If one of the allocations fails, the entire allocation fails.
546This routine is the preferred way of fetching both the
547.Vt mbuf
548and
549.Vt mbuf cluster
550together, as it avoids having to unlock/relock between allocations.
551Returns
552.Dv NULL
553on failure.
554.It Fn m_getclr how type
555Allocate an
556.Vt mbuf
557and zero out the data region.
558.It Fn m_free mbuf
559Frees
560.Vt mbuf .
561.El
562.Pp
563The functions below operate on
564.Vt mbuf chains .
565.Bl -ohang -offset indent
566.It Fn m_freem mbuf
567Free an entire
568.Vt mbuf chain ,
569including any external storage.
570.\"
571.It Fn m_adj mbuf len
572Trim
573.Fa len
574bytes from the head of an
575.Vt mbuf chain
576if
577.Fa len
578is positive, from the tail otherwise.
579.\"
580.It Fn m_prepend mbuf len how
581Allocate a new
582.Vt mbuf
583and prepend it to the
584.Vt mbuf chain ,
585handle
586.Dv M_PKTHDR
587properly.
588.Sy Note :
589It doesn't allocate any
590.Vt mbuf clusters ,
591so
592.Fa len
593must be less than
594.Dv MLEN
595or
596.Dv MHLEN ,
597depending on the
598.Dv M_PKTHDR
599flag setting.
600.\"
601.It Fn m_pullup mbuf len
602Arrange that the first
603.Fa len
604bytes of an
605.Vt mbuf chain
606are contiguous and lay in the data area of
607.Fa mbuf ,
608so they are accessible with
609.Fn mtod mbuf type .
610Return the new
611.Vt mbuf chain
612on success,
613.Dv NULL
614on failure
615(the
616.Vt mbuf chain
617is freed in this case).
618.Sy Note :
619It doesn't allocate any
620.Vt mbuf clusters ,
621so
622.Fa len
623must be less than
624.Dv MHLEN .
625.\"
626.It Fn m_copym mbuf offset len how
627Make a copy of an
628.Vt mbuf chain
629starting
630.Fa offset
631bytes from the beginning, continuing for
632.Fa len
633bytes.
634If
635.Fa len
636is
637.Dv M_COPYALL ,
638copy to the end of the
639.Vt mbuf chain .
640.Sy Note :
641The copy is read-only, because the
642.Vt mbuf clusters
643are not copied, only their reference counts are incremented.
644.\"
645.It Fn m_copypacket mbuf how
646Copy an entire packet including header, which must be present.
647This is an optimized version of the common case
648.Fn m_copym mbuf 0 M_COPYALL how .
649.Sy Note :
650the copy is read-only, because the
651.Vt mbuf clusters
652are not copied, only their reference counts are incremented.
653.\"
654.It Fn m_dup mbuf how
655Copy a packet header
656.Vt mbuf chain
657into a completely new
658.Vt mbuf chain ,
659including copying any
660.Vt mbuf clusters .
661Use this instead of
662.Fn m_copypacket
663when you need a writable copy of an
664.Vt mbuf chain .
665.\"
666.It Fn m_copydata mbuf offset len buf
667Copy data from an
668.Vt mbuf chain
669starting
670.Fa off
671bytes from the beginning, continuing for
672.Fa len
673bytes, into the indicated buffer
674.Fa buf .
675.\"
676.It Fn m_copyback mbuf offset len buf
677Copy
678.Fa len
679bytes from the buffer
680.Fa buf
681back into the indicated
682.Vt mbuf chain ,
683starting at
684.Fa offset
685bytes from the beginning of the
686.Vt mbuf chain ,
687extending the
688.Vt mbuf chain
689if necessary.
690.Sy Note :
691It doesn't allocate any
692.Vt mbuf clusters ,
693just adds
694.Vt mbufs
695to the
696.Vt mbuf chain .
697It's safe to set
698.Fa offset
699beyond the current
700.Vt mbuf chain
701end: zeroed
702.Vt mbufs
703will be allocated to fill the space.
704.\"
705.It Fn m_length mbuf last
706Return the length of the
707.Vt mbuf chain ,
708and optionally a pointer to the last
709.Vt mbuf .
710.\"
711.It Fn m_dup_pkthdr to from how
712Upon the function's completion, the
713.Vt mbuf
714.Fa to
715will contain an identical copy of
716.Fa from->m_pkthdr
717and the per-packet attributes found in the
718.Vt mbuf chain
719.Fa from .
720The
721.Vt mbuf
722.Fa from
723must have the flag
724.Dv M_PKTHDR
725initially set, and
726.Fa to
727must be empty on entry.
728.\"
729.It Fn m_move_pkthdr to from
730Move
731.Va m_pkthdr
732and the per-packet attributes from the
733.Vt mbuf chain
734.Fa from
735to the
736.Vt mbuf
737.Fa to .
738The
739.Vt mbuf
740.Fa from
741must have the flag
742.Dv M_PKTHDR
743initially set, and
744.Fa to
745must be empty on entry.
746Upon the function's completion,
747.Fa from
748will have the flag
749.Dv M_PKTHDR
750and the per-packet attributes cleared.
751.\"
752.It Fn m_fixhdr mbuf
753Set the packet-header length to the length of the
754.Vt mbuf chain .
755.\"
756.It Fn m_devget buf len offset ifp copy
757Copy data from a device local memory pointed to by
758.Fa buf
759to an
760.Vt mbuf chain .
761The copy is done using a specified copy routine
762.Fa copy ,
763or
764.Fn bcopy
765if
766.Fa copy
767is
768.Dv NULL .
769.\"
770.It Fn m_cat m n
771Concatenate
772.Fa n
773to
774.Fa m .
775Both
776.Vt mbuf chains
777must be of the same type.
778.Fa N
779is still valid after the function returned.
780.Sy Note :
781It does not handle
782.Dv M_PKTHDR
783and friends.
784.\"
785.It Fn m_split mbuf len how
786Partition an
787.Vt mbuf chain
788in two pieces, returning the tail:
789all but the first
790.Fa len
791bytes.
792In case of failure, it returns
793.Dv NULL
794and attempts to restore the
795.Vt mbuf chain
796to its original state.
797.\"
798.It Fn m_apply mbuf off len f arg
799Apply a function to an
800.Vt mbuf chain ,
801at offset
802.Fa off ,
803for length
804.Fa len
805bytes.
806Typically used to avoid calls to
807.Fn m_pullup
808which would otherwise be unnecessary or undesirable.
809.Fa arg
810is a convenience argument which is passed to the callback function
811.Fa f .
812.Pp
813Each time
814.Fn f
815is called, it will be passed
816.Fa arg ,
817a pointer to the
818.Fa data
819in the current mbuf, and the length
820.Fa len
821of the data in this mbuf to which the function should be applied.
822.Pp
823The function should return zero to indicate success;
824otherwise, if an error is indicated, then
825.Fn m_apply
826will return the error and stop iterating through the
827.Vt mbuf chain .
828.\"
829.It Fn m_getptr mbuf loc off
830Return a pointer to the mbuf containing the data located at
831.Fa loc
832bytes from the beginning of the
833.Vt mbuf chain .
834The corresponding offset into the mbuf will be stored in
835.Fa *off .
836.It Fn m_defrag m0 how
837Defragment an mbuf chain, returning the shortest possible
838chain of mbufs and clusters.
839If allocation fails and this can not be completed,
840.Dv NULL
841will be returned and the original chain will be unchanged.
842Upon success, the original chain will be freed and the new
843chain will be returned.
844.Fa how
845should be either
846.Dv M_TRYWAIT
847or
848.Dv M_DONTWAIT ,
849depending on the caller's preference.
850.Pp
851This function is especially useful in network drivers, where
852certain long mbuf chains must be shortened before being added
853to TX descriptor lists.
854.El
855.Sh HARDWARE-ASSISTED CHECKSUM CALCULATION
856This section currently applies to TCP/IP only.
857In order to save the host CPU resources, computing checksums is
858offloaded to the network interface hardware if possible.
859The
860.Va m_pkthdr
861member of the leading
862.Vt mbuf
863of a packet contains two fields used for that purpose,
864.Vt int Va csum_flags
865and
866.Vt int Va csum_data .
867The meaning of those fields depends on the direction a packet flows in,
868and on whether the packet is fragmented.
869Henceforth,
870.Va csum_flags
871or
872.Va csum_data
873of a packet
874will denote the corresponding field of the
875.Va m_pkthdr
876member of the leading
877.Vt mbuf
878in the
879.Vt mbuf chain
880containing the packet.
881.Pp
882On output, checksum offloading is attempted after the outgoing
883interface has been determined for a packet.
884The interface-specific field
885.Va ifnet.if_data.ifi_hwassist
886(see
887.Xr ifnet 9 )
888is consulted for the capabilities of the interface to assist in
889computing checksums.
890The
891.Va csum_flags
892field of the packet header is set to indicate which actions the interface
893is supposed to perform on it.
894The actions unsupported by the network interface are done in the
895software prior to passing the packet down to the interface driver;
896such actions will never be requested through
897.Va csum_flags .
898.Pp
899The flags demanding a particular action from an interface are as follows:
900.Bl -tag -width ".Dv CSUM_TCP" -offset indent
901.It Dv CSUM_IP
902The IP header checksum is to be computed and stored in the
903corresponding field of the packet.
904The hardware is expected to know the format of an IP header
905to determine the offset of the IP checksum field.
906.It Dv CSUM_TCP
907The TCP checksum is to be computed.
908(See below.)
909.It Dv CSUM_UDP
910The UDP checksum is to be computed.
911(See below.)
912.El
913.Pp
914Should a TCP or UDP checksum be offloaded to the hardware,
915the field
916.Va csum_data
917will contain the byte offset of the checksum field relative to the
918end of the IP header.
919In this case, the checksum field will be initially
920set by the TCP/IP module to the checksum of the pseudo header
921defined by the TCP and UDP specifications.
922.Pp
923For outbound packets which have been fragmented
924by the host CPU, the following will also be true,
925regardless of the checksum flag settings:
926.Bl -bullet -offset indent
927.It
928all fragments will have the flag
929.Dv M_FRAG
930set in their
931.Va m_flags
932field;
933.It
934the first and the last fragments in the chain will have
935.Dv M_FIRSTFRAG
936or
937.Dv M_LASTFRAG
938set in their
939.Va m_flags ,
940correspondingly;
941.It
942the first fragment in the chain will have the total number
943of fragments contained in its
944.Va csum_data
945field.
946.El
947.Pp
948The last rule for fragmented packets takes precedence over the one
949for a TCP or UDP checksum.
950Nevertheless, offloading a TCP or UDP checksum is possible for a
951fragmented packet if the flag
952.Dv CSUM_IP_FRAGS
953is set in the field
954.Va ifnet.if_data.ifi_hwassist
955associated with the network interface.
956However, in this case the interface is expected to figure out
957the location of the checksum field within the sequence of fragments
958by itself because
959.Va csum_data
960contains a fragment count instead of a checksum offset value.
961.Pp
962On input, an interface indicates the actions it has performed
963on a packet by setting one or more of the following flags in
964.Va csum_flags
965associated with the packet:
966.Bl -tag -width ".Dv CSUM_IP_CHECKED" -offset indent
967.It Dv CSUM_IP_CHECKED
968The IP header checksum has been computed.
969.It Dv CSUM_IP_VALID
970The IP header has a valid checksum.
971This flag can appear only in combination with
972.Dv CSUM_IP_CHECKED .
973.It Dv CSUM_DATA_VALID
974The checksum of the data portion of the IP packet has been computed
975and stored in the field
976.Va csum_data
977in network byte order.
978.It Dv CSUM_PSEUDO_HDR
979Can be set only along with
980.Dv CSUM_DATA_VALID
981to indicate that the IP data checksum found in
982.Va csum_data
983allows for the pseudo header defined by the TCP and UDP specifications.
984Otherwise the checksum of the pseudo header must be calculated by
985the host CPU and added to
986.Va csum_data
987to obtain the final checksum to be used for TCP or UDP validation purposes.
988.El
989.Pp
990If a particular network interface just indicates success or
991failure of TCP or UDP checksum validation without returning
992the exact value of the checksum to the host CPU, its driver can mark
993.Dv CSUM_DATA_VALID
994and
995.Dv CSUM_PSEUDO_HDR
996in
997.Va csum_flags ,
998and set
999.Va csum_data
1000to
1001.Li 0xFFFF
1002hexadecimal to indicate a valid checksum.
1003It is a peculiarity of the algorithm used that the Internet checksum
1004calculated over any valid packet will be
1005.Li 0xFFFF
1006as long as the original checksum field is included.
1007.Pp
1008For inbound packets which are IP fragments, all
1009.Va csum_data
1010fields will be summed during reassembly to obtain the final checksum
1011value passed to an upper layer in the
1012.Va csum_data
1013field of the reassembled packet.
1014The
1015.Va csum_flags
1016fields of all fragments will be consolidated using logical AND
1017to obtain the final value for
1018.Va csum_flags .
1019Thus, in order to successfully
1020offload checksum computation for fragmented data,
1021all fragments should have the same value of
1022.Va csum_flags .
1023.Sh STRESS TESTING
1024When running a kernel compiled with the option
1025.Dv MBUF_STRESS_TEST ,
1026the following
1027.Xr sysctl 8 Ns
1028-controlled options may be used to create
1029various failure/extreme cases for testing of network drivers
1030and other parts of the kernel that rely on
1031.Vt mbufs .
1032.Bl -tag -width ident
1033.It Va net.inet.ip.mbuf_frag_size
1034Causes
1035.Fn ip_output
1036to fragment outgoing
1037.Vt mbuf chains
1038into fragments of the specified size.
1039Setting this variable to 1 is an excellent way to
1040test the long
1041.Vt mbuf chain
1042handling ability of network drivers.
1043.It Va kern.ipc.m_defragrandomfailures
1044Causes the function
1045.Fn m_defrag
1046to randomly fail, returning
1047.Dv NULL .
1048Any piece of code which uses
1049.Fn m_defrag
1050should be tested with this feature.
1051.El
1052.Sh RETURN VALUES
1053See above.
1054.Sh SEE ALSO
1055.Xr ifnet 9 ,
1056.Xr mbuf_tags 9
1057.Sh HISTORY
1058.\" Please correct me if I'm wrong
1059.Vt Mbufs
1060appeared in an early version of
1061.Bx .
1062Besides being used for network packets, they were used
1063to store various dynamic structures, such as routing table
1064entries, interface addresses, protocol control blocks, etc.
1065.Sh AUTHORS
1066The original
1067.Nm
1068man page was written by Yar Tikhiy.
1069