xref: /freebsd/share/man/man9/mbuf.9 (revision 2357939bc239bd5334a169b62313806178dd8f30)
1.\" Copyright (c) 2000 FreeBSD Inc.
2.\" All rights reserved.
3.\"
4.\" Redistribution and use in source and binary forms, with or without
5.\" modification, are permitted provided that the following conditions
6.\" are met:
7.\" 1. Redistributions of source code must retain the above copyright
8.\"    notice, this list of conditions and the following disclaimer.
9.\" 2. Redistributions in binary form must reproduce the above copyright
10.\"    notice, this list of conditions and the following disclaimer in the
11.\"    documentation and/or other materials provided with the distribution.
12.\"
13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16.\" ARE DISCLAIMED.  IN NO EVENT SHALL [your name] OR CONTRIBUTORS BE LIABLE
17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23.\" SUCH DAMAGE.
24.\"
25.\" $FreeBSD$
26.\"
27.Dd October 17, 2000
28.Dt MBUF 9
29.Os
30.\"
31.Sh NAME
32.Nm mbuf
33.Nd "memory management in the kernel IPC subsystem"
34.\"
35.Sh SYNOPSIS
36.In sys/param.h
37.In sys/systm.h
38.In sys/mbuf.h
39.\"
40.Ss Mbuf allocation macros
41.Fn MGET "struct mbuf *mbuf" "int how" "short type"
42.Fn MGETHDR "struct mbuf *mbuf" "int how" "short type"
43.Fn MCLGET "struct mbuf *mbuf" "int how"
44.Fo MEXTADD
45.Fa "struct mbuf *mbuf"
46.Fa "caddr_t buf"
47.Fa "u_int size"
48.Fa "void (*free)(void *opt_args)"
49.Fa "void *opt_args"
50.Fa "short flags"
51.Fa "int type"
52.Fc
53.Fn MEXTFREE "struct mbuf *mbuf"
54.Fn MEXT_ADD_REF "struct mbuf *mbuf"
55.Fn MEXT_REM_REF "struct mbuf *mbuf"
56.Fn MFREE "struct mbuf *mbuf" "struct mbuf *successor"
57.\"
58.Ss Mbuf utility macros
59.Fn mtod "struct mbuf *mbuf" "type"
60.Ft int
61.Fn MEXT_IS_REF "struct mbuf *mbuf"
62.Fn M_ALIGN "struct mbuf *mbuf" "u_int len"
63.Fn MH_ALIGN "struct mbuf *mbuf" "u_int len"
64.Ft int
65.Fn M_LEADINGSPACE "struct mbuf *mbuf"
66.Ft int
67.Fn M_TRAILINGSPACE "struct mbuf *mbuf"
68.Fn M_MOVE_PKTHDR "struct mbuf *to" "struct mbuf *from"
69.Fn M_PREPEND "struct mbuf *mbuf" "int len" "int how"
70.Fn MCHTYPE "struct mbuf *mbuf" "u_int type"
71.Ft int
72.Fn M_WRITABLE "struct mbuf *mbuf"
73.\"
74.Ss Mbuf allocation functions
75.Ft struct mbuf *
76.Fn m_get "int how" "int type"
77.Ft struct mbuf *
78.Fn m_getm "struct mbuf *orig" "int len" "int how" "int type"
79.Ft struct mbuf *
80.Fn m_getcl "int how" "short type" "int flags"
81.Ft struct mbuf *
82.Fn m_getclr "int how" "int type"
83.Ft struct mbuf *
84.Fn m_gethdr "int how" "int type"
85.Ft struct mbuf *
86.Fn m_free "struct mbuf *mbuf"
87.Ft void
88.Fn m_freem "struct mbuf *mbuf"
89.\"
90.Ss Mbuf utility functions
91.Ft void
92.Fn m_adj "struct mbuf *mbuf" "int len"
93.Ft struct mbuf *
94.Fn m_prepend "struct mbuf *mbuf" "int len" "int how"
95.Ft struct mbuf *
96.Fn m_pullup "struct mbuf *mbuf" "int len"
97.Ft struct mbuf *
98.Fn m_copym "struct mbuf *mbuf" "int offset" "int len" "int how"
99.Ft struct mbuf *
100.Fn m_copypacket "struct mbuf *mbuf" "int how"
101.Ft struct mbuf *
102.Fn m_dup "struct mbuf *mbuf" "int how"
103.Ft void
104.Fn m_copydata "const struct mbuf *mbuf" "int offset" "int len" "caddr_t buf"
105.Ft void
106.Fn m_copyback "struct mbuf *mbuf" "int offset" "int len" "caddr_t buf"
107.Ft struct mbuf *
108.Fo m_devget
109.Fa "char *buf"
110.Fa "int len"
111.Fa "int offset"
112.Fa "struct ifnet *ifp"
113.Fa "void (*copy)(char *from, caddr_t to, u_int len)"
114.Fc
115.Ft void
116.Fn m_cat "struct mbuf *m" "struct mbuf *n"
117.Ft u_int
118.Fn m_fixhdr "struct mbuf *mbuf"
119.Ft void
120.Fn m_dup_pkthdr "struct mbuf *to" "struct mbuf *from"
121.Ft void
122.Fn m_move_pkthdr "struct mbuf *to" "struct mbuf *from"
123.Ft u_int
124.Fn m_length "struct mbuf *mbuf" "struct mbuf **last"
125.Ft struct mbuf *
126.Fn m_split "struct mbuf *mbuf" "int len" "int how"
127.Ft int
128.Fn m_apply "struct mbuf *mbuf" "int off" "int len" "int (*f)(void *arg, void *data, u_int len)" "void *arg"
129.Ft struct mbuf *
130.Fn m_getptr "struct mbuf *mbuf" "int loc" "int *off"
131.Ft struct mbuf *
132.Fn m_defrag "struct mbuf *m0" "int how"
133.\"
134.Sh DESCRIPTION
135An
136.Vt mbuf
137is a basic unit of memory management in the kernel IPC subsystem.
138Network packets and socket buffers are stored in
139.Vt mbufs .
140A network packet may span multiple
141.Vt mbufs
142arranged into a
143.Vt mbuf chain
144(linked list),
145which allows adding or trimming
146network headers with little overhead.
147.Pp
148While a developer should not bother with
149.Vt mbuf
150internals without serious
151reason in order to avoid incompatibilities with future changes, it
152is useful to understand the general structure of an
153.Vt mbuf .
154.Pp
155An
156.Vt mbuf
157consists of a variable-sized header and a small internal
158buffer for data.
159The total size of an
160.Vt mbuf ,
161.Dv MSIZE ,
162is a constant defined in
163.In sys/param.h .
164The
165.Vt mbuf
166header includes:
167.Pp
168.Bl -tag -width "m_nextpkt" -offset indent
169.It Va m_next
170.Pq Vt struct mbuf *
171A pointer to the next
172.Vt mbuf
173in the
174.Vt mbuf chain .
175.It Va m_nextpkt
176.Pq Vt struct mbuf *
177A pointer to the next
178.Vt mbuf chain
179in the queue.
180.It Va m_data
181.Pq Vt caddr_t
182A pointer to data attached to this
183.Vt mbuf .
184.It Va m_len
185.Pq Vt int
186The length of the data.
187.It Va m_type
188.Pq Vt short
189The type of the data.
190.It Va m_flags
191.Pq Vt int
192The
193.Vt mbuf
194flags.
195.El
196.Pp
197The
198.Vt mbuf
199flag bits are defined as follows:
200.Bd -literal
201/* mbuf flags */
202#define	M_EXT		0x0001	/* has associated external storage */
203#define	M_PKTHDR	0x0002	/* start of record */
204#define	M_EOR		0x0004	/* end of record */
205#define	M_RDONLY	0x0008	/* associated data marked read-only */
206#define	M_PROTO1	0x0010	/* protocol-specific */
207#define	M_PROTO2	0x0020 	/* protocol-specific */
208#define	M_PROTO3	0x0040	/* protocol-specific */
209#define	M_PROTO4	0x0080	/* protocol-specific */
210#define	M_PROTO5	0x0100	/* protocol-specific */
211#define	M_PROTO6	0x4000	/* protocol-specific (avoid M_BCAST conflict) */
212#define	M_FREELIST	0x8000	/* mbuf is on the free list */
213
214/* mbuf pkthdr flags (also stored in m_flags) */
215#define	M_BCAST		0x0200	/* send/received as link-level broadcast */
216#define	M_MCAST		0x0400	/* send/received as link-level multicast */
217#define	M_FRAG		0x0800	/* packet is fragment of larger packet */
218#define	M_FIRSTFRAG	0x1000	/* packet is first fragment */
219#define	M_LASTFRAG	0x2000	/* packet is last fragment */
220.Ed
221.Pp
222The available
223.Vt mbuf
224types are defined as follows:
225.Bd -literal
226/* mbuf types */
227#define	MT_DATA		1	/* dynamic (data) allocation */
228#define	MT_HEADER	2	/* packet header */
229#define	MT_SONAME	8	/* socket name */
230#define	MT_FTABLE	11	/* fragment reassembly header */
231#define	MT_TAG		13	/* volatile metadata associated to pkts */
232#define	MT_CONTROL	14	/* extra-data protocol message */
233#define	MT_OOBDATA	15	/* expedited data */
234.Ed
235.Pp
236If the
237.Dv M_PKTHDR
238flag is set, a
239.Vt struct pkthdr Va m_pkthdr
240is added to the
241.Vt mbuf
242header.
243It contains a pointer to the interface
244the packet has been received from
245.Pq Vt struct ifnet Va *rcvif ,
246and the total packet length
247.Pq Vt int Va len .
248Fields used in offloading checksum calculation to the hardware are kept in
249.Va m_pkthdr
250as well.
251See
252.Sx HARDWARE-ASSISTED CHECKSUM CALCULATION
253for details.
254.Pp
255If small enough, data is stored in the internal data buffer of an
256.Vt mbuf .
257If the data is sufficiently large, another
258.Vt mbuf
259may be added to the
260.Vt mbuf chain ,
261or external storage may be associated with the
262.Vt mbuf .
263.Dv MHLEN
264bytes of data can fit into an
265.Vt mbuf
266with the
267.Dv M_PKTHDR
268flag set,
269.Dv MLEN
270bytes can otherwise.
271.Pp
272If external storage is being associated with an
273.Vt mbuf ,
274the
275.Va m_ext
276header is added at the cost of losing the internal data buffer.
277It includes a pointer to external storage, the size of the storage,
278a pointer to a function used for freeing the storage,
279a pointer to an optional argument that can be passed to the function,
280and a pointer to a reference counter.
281An
282.Vt mbuf
283using external storage has the
284.Dv M_EXT
285flag set.
286.Pp
287The system supplies a macro for allocating the desired external storage
288buffer,
289.Dv MEXTADD .
290.Pp
291The allocation and management of the reference counter is handled by the
292subsystem.
293The developer can check whether the reference count for the
294external storage of a given
295.Vt mbuf
296is greater than 1 with the
297.Dv MEXT_IS_REF
298macro.
299Similarly, the developer can directly add and remove references,
300if absolutely necessary, with the use of the
301.Dv MEXT_ADD_REF
302and
303.Dv MEXT_REM_REF
304macros.
305.Pp
306The system also supplies a default type of external storage buffer called an
307.Vt mbuf cluster .
308.Vt Mbuf clusters
309can be allocated and configured with the use of the
310.Dv MCLGET
311macro.
312Each
313.Vt mbuf cluster
314is
315.Dv MCLBYTES
316in size, where MCLBYTES is a machine-dependent constant.
317The system defines an advisory macro
318.Dv MINCLSIZE ,
319which is the smallest amount of data to put into an
320.Vt mbuf cluster .
321It's equal to the sum of
322.Dv MLEN
323and
324.Dv MHLEN .
325It is typically preferable to store data into the data region of an
326.Vt mbuf ,
327if size permits, as opposed to allocating a separate
328.Vt mbuf cluster
329to hold the same data.
330.\"
331.Ss Macros and Functions
332There are numerous predefined macros and functions that provide the
333developer with common utilities.
334.\"
335.Bl -ohang -offset indent
336.It Fn mtod mbuf type
337Convert an
338.Fa mbuf
339pointer to a data pointer.
340The macro expands to the data pointer cast to the pointer of the specified
341.Fa type .
342.Sy Note :
343It is advisable to ensure that there is enough contiguous data in
344.Fa mbuf .
345See
346.Fn m_pullup
347for details.
348.It Fn MGET mbuf how type
349Allocate an
350.Vt mbuf
351and initialize it to contain internal data.
352.Fa mbuf
353will point to the allocated
354.Vt mbuf
355on success, or be set to
356.Dv NULL
357on failure.
358The
359.Fa how
360argument is to be set to
361.Dv M_TRYWAIT
362or
363.Dv M_DONTWAIT .
364It specifies whether the caller is willing to block if necessary.
365If
366.Fa how
367is set to
368.Dv M_TRYWAIT ,
369a failed allocation will result in the caller being put
370to sleep for a designated
371kern.ipc.mbuf_wait
372.Xr ( sysctl 8
373tunable)
374number of ticks.
375A number of other functions and macros related to
376.Vt mbufs
377have the same argument because they may
378at some point need to allocate new
379.Vt mbufs .
380.Pp
381Programmers should be careful not to confuse the
382.Vt mbuf
383allocation flag
384.Dv M_DONTWAIT
385with the
386.Xr malloc 9
387allocation flag,
388.Dv M_NOWAIT .
389They are not the same.
390.It Fn MGETHDR mbuf how type
391Allocate an
392.Vt mbuf
393and initialize it to contain a packet header
394and internal data.
395See
396.Fn MGET
397for details.
398.It Fn MCLGET mbuf how
399Allocate and attach an
400.Vt mbuf cluster
401to
402.Fa mbuf .
403If the macro fails, the
404.Dv M_EXT
405flag won't be set in
406.Fa mbuf .
407.It Fn M_ALIGN mbuf len
408Set the pointer
409.Fa mbuf->m_data
410to place an object of the size
411.Fa len
412at the end of the internal data area of
413.Fa mbuf ,
414long word aligned.
415Applicable only if
416.Fa mbuf
417is newly allocated with
418.Fn MGET
419or
420.Fn m_get .
421.It Fn MH_ALIGN mbuf len
422Serves the same purpose as
423.Fn M_ALIGN
424does, but only for
425.Fa mbuf
426newly allocated with
427.Fn MGETHDR
428or
429.Fn m_gethdr ,
430or initialized by
431.Fn m_dup_pkthdr
432or
433.Fn m_move_pkthdr .
434.It Fn M_LEADINGSPACE mbuf
435Returns the number of bytes available before the beginning
436of data in
437.Fa mbuf .
438.It Fn M_TRAILINGSPACE mbuf
439Returns the number of bytes available after the end of data in
440.Fa mbuf .
441.It Fn M_PREPEND mbuf len how
442This macro operates on an
443.Vt mbuf chain .
444It is an optimized wrapper for
445.Fn m_prepend
446that can make use of possible empty space before data
447(e.g. left after trimming of a link-layer header).
448The new
449.Vt mbuf chain
450pointer or
451.Dv NULL
452is in
453.Fa mbuf
454after the call.
455.It Fn M_MOVE_PKTHDR to from
456Using this macro is equivalent to calling
457.Fn m_move_pkthdr to from .
458.It Fn M_WRITABLE mbuf
459This macro will evaluate true if
460.Fa mbuf
461is not marked
462.Dv M_RDONLY
463and if either
464.Fa mbuf
465does not contain external storage or,
466if it does,
467then if the reference count of the storage is not greater than 1.
468The
469.Dv M_RDONLY
470flag can be set in
471.Fa mbuf->m_flags .
472This can be achieved during setup of the external storage,
473by passing the
474.Dv M_RDONLY
475bit as a
476.Fa flags
477argument to the
478.Fn MEXTADD
479macro, or can be directly set in individual
480.Vt mbufs .
481.It Fn MCHTYPE mbuf type
482Change the type of
483.Fa mbuf
484to
485.Fa type .
486This is a relatively expensive operation and should be avoided.
487.El
488.Pp
489The functions are:
490.Bl -ohang -offset indent
491.It Fn m_get how type
492A function version of
493.Fn MGET
494for non-critical paths.
495.It Fn m_getm orig len how type
496Allocate
497.Fa len
498bytes worth of
499.Vt mbufs
500and
501.Vt mbuf clusters
502if necessary and append the resulting allocated
503.Vt mbuf chain
504to the
505.Vt mbuf chain
506.Fa orig ,
507if it is
508.No non- Ns Dv NULL .
509If the allocation fails at any point,
510free whatever was allocated and return
511.Dv NULL .
512If
513.Fa orig
514is
515.No non- Ns Dv NULL ,
516it will not be freed.
517It is possible to use
518.Fn m_getm
519to either append
520.Fa len
521bytes to an existing
522.Vt mbuf
523or
524.Vt mbuf chain
525(for example, one which may be sitting in a pre-allocated ring)
526or to simply perform an all-or-nothing
527.Vt mbuf
528and
529.Vt mbuf cluster
530allocation.
531.It Fn m_gethdr how type
532A function version of
533.Fn MGETHDR
534for non-critical paths.
535.It Fn m_getcl how type flags
536Fetch an
537.Vt mbuf
538with a
539.Vt mbuf cluster
540attached to it.
541If one of the allocations fails, the entire allocation fails.
542This routine is the preferred way of fetching both the
543.Vt mbuf
544and
545.Vt mbuf cluster
546together, as it avoids having to unlock/relock between allocations.
547Returns
548.Dv NULL
549on failure.
550.It Fn m_getclr how type
551Allocate an
552.Vt mbuf
553and zero out the data region.
554.It Fn m_free mbuf
555Frees
556.Vt mbuf .
557.El
558.Pp
559The functions below operate on
560.Vt mbuf chains .
561.Bl -ohang -offset indent
562.It Fn m_freem mbuf
563Free an entire
564.Vt mbuf chain ,
565including any external storage.
566.\"
567.It Fn m_adj mbuf len
568Trim
569.Fa len
570bytes from the head of an
571.Vt mbuf chain
572if
573.Fa len
574is positive, from the tail otherwise.
575.\"
576.It Fn m_prepend mbuf len how
577Allocate a new
578.Vt mbuf
579and prepend it to the
580.Vt mbuf chain ,
581handle
582.Dv M_PKTHDR
583properly.
584.Sy Note :
585It doesn't allocate any
586.Vt mbuf clusters ,
587so
588.Fa len
589must be less than
590.Dv MLEN
591or
592.Dv MHLEN ,
593depending on the
594.Dv M_PKTHDR
595flag setting.
596.\"
597.It Fn m_pullup mbuf len
598Arrange that the first
599.Fa len
600bytes of an
601.Vt mbuf chain
602are contiguous and lay in the data area of
603.Fa mbuf ,
604so they are accessible with
605.Fn mtod mbuf type .
606Return the new
607.Vt mbuf chain
608on success,
609.Dv NULL
610on failure
611(the
612.Vt mbuf chain
613is freed in this case).
614.Sy Note :
615It doesn't allocate any
616.Vt mbuf clusters ,
617so
618.Fa len
619must be less than
620.Dv MHLEN .
621.\"
622.It Fn m_copym mbuf offset len how
623Make a copy of an
624.Vt mbuf chain
625starting
626.Fa offset
627bytes from the beginning, continuing for
628.Fa len
629bytes.
630If
631.Fa len
632is
633.Dv M_COPYALL ,
634copy to the end of the
635.Vt mbuf chain .
636.Sy Note :
637The copy is read-only, because the
638.Vt mbuf clusters
639are not copied, only their reference counts are incremented.
640.\"
641.It Fn m_copypacket mbuf how
642Copy an entire packet including header, which must be present.
643This is an optimized version of the common case
644.Fn m_copym mbuf 0 M_COPYALL how .
645.Sy Note :
646the copy is read-only, because the
647.Vt mbuf clusters
648are not copied, only their reference counts are incremented.
649.\"
650.It Fn m_dup mbuf how
651Copy a packet header
652.Vt mbuf chain
653into a completely new
654.Vt mbuf chain ,
655including copying any
656.Vt mbuf clusters .
657Use this instead of
658.Fn m_copypacket
659when you need a writable copy of an
660.Vt mbuf chain .
661.\"
662.It Fn m_copydata mbuf offset len buf
663Copy data from an
664.Vt mbuf chain
665starting
666.Fa off
667bytes from the beginning, continuing for
668.Fa len
669bytes, into the indicated buffer
670.Fa buf .
671.\"
672.It Fn m_copyback mbuf offset len buf
673Copy
674.Fa len
675bytes from the buffer
676.Fa buf
677back into the indicated
678.Vt mbuf chain ,
679starting at
680.Fa offset
681bytes from the beginning of the
682.Vt mbuf chain ,
683extending the
684.Vt mbuf chain
685if necessary.
686.Sy Note :
687It doesn't allocate any
688.Vt mbuf clusters ,
689just adds
690.Vt mbufs
691to the
692.Vt mbuf chain .
693It's safe to set
694.Fa offset
695beyond the current
696.Vt mbuf chain
697end: zeroed
698.Vt mbufs
699will be allocated to fill the space.
700.\"
701.It Fn m_length mbuf last
702Return the length of the
703.Vt mbuf chain ,
704and optionally a pointer to the last
705.Vt mbuf .
706.\"
707.It Fn m_dup_pkthdr to from how
708Upon the function's completion, the
709.Vt mbuf
710.Fa to
711will contain an identical copy of
712.Fa from->m_pkthdr
713and the per-packet attributes found in the
714.Vt mbuf chain
715.Fa from .
716The
717.Vt mbuf
718.Fa from
719must have the flag
720.Dv M_PKTHDR
721initially set, and
722.Fa to
723must be empty on entry.
724.\"
725.It Fn m_move_pkthdr to from
726Move
727.Va m_pkthdr
728and the per-packet attributes from the
729.Vt mbuf chain
730.Fa from
731to the
732.Vt mbuf
733.Fa to .
734The
735.Vt mbuf
736.Fa from
737must have the flag
738.Dv M_PKTHDR
739initially set, and
740.Fa to
741must be empty on entry.
742Upon the function's completion,
743.Fa from
744will have the flag
745.Dv M_PKTHDR
746and the per-packet attributes cleared.
747.\"
748.It Fn m_fixhdr mbuf
749Set the packet-header length to the length of the
750.Vt mbuf chain .
751.\"
752.It Fn m_devget buf len offset ifp copy
753Copy data from a device local memory pointed to by
754.Fa buf
755to an
756.Vt mbuf chain .
757The copy is done using a specified copy routine
758.Fa copy ,
759or
760.Fn bcopy
761if
762.Fa copy
763is
764.Dv NULL .
765.\"
766.It Fn m_cat m n
767Concatenate
768.Fa n
769to
770.Fa m .
771Both
772.Vt mbuf chains
773must be of the same type.
774.Fa N
775is still valid after the function returned.
776.Sy Note :
777It does not handle
778.Dv M_PKTHDR
779and friends.
780.\"
781.It Fn m_split mbuf len how
782Partition an
783.Vt mbuf chain
784in two pieces, returning the tail:
785all but the first
786.Fa len
787bytes.
788In case of failure, it returns
789.Dv NULL
790and attempts to restore the
791.Vt mbuf chain
792to its original state.
793.\"
794.It Fn m_apply mbuf off len f arg
795Apply a function to an
796.Vt mbuf chain ,
797at offset
798.Fa off ,
799for length
800.Fa len
801bytes.
802Typically used to avoid calls to
803.Fn m_pullup
804which would otherwise be unnecessary or undesirable.
805.Fa arg
806is a convenience argument which is passed to the callback function
807.Fa f .
808.Pp
809Each time
810.Fn f
811is called, it will be passed
812.Fa arg ,
813a pointer to the
814.Fa data
815in the current mbuf, and the length
816.Fa len
817of the data in this mbuf to which the function should be applied.
818.Pp
819The function should return zero to indicate success;
820otherwise, if an error is indicated, then
821.Fn m_apply
822will return the error and stop iterating through the
823.Vt mbuf chain .
824.\"
825.It Fn m_getptr mbuf loc off
826Return a pointer to the mbuf containing the data located at
827.Fa loc
828bytes from the beginning of the
829.Vt mbuf chain .
830The corresponding offset into the mbuf will be stored in
831.Fa *off .
832.It Fn m_defrag m0 how
833Defragment an mbuf chain, returning the shortest possible
834chain of mbufs and clusters.
835If allocation fails and this can not be completed,
836.Dv NULL
837will be returned and the original chain will be unchanged.
838Upon success, the original chain will be freed and the new
839chain will be returned.
840.Fa how
841should be either
842.Dv M_TRYWAIT
843or
844.Dv M_DONTWAIT ,
845depending on the caller's preference.
846.Pp
847This function is especially useful in network drivers, where
848certain long mbuf chains must be shortened before being added
849to TX descriptor lists.
850.El
851.Sh HARDWARE-ASSISTED CHECKSUM CALCULATION
852This section currently applies to TCP/IP only.
853In order to save the host CPU resources, computing checksums is
854offloaded to the network interface hardware if possible.
855The
856.Va m_pkthdr
857member of the leading
858.Vt mbuf
859of a packet contains two fields used for that purpose,
860.Vt int Va csum_flags
861and
862.Vt int Va csum_data .
863The meaning of those fields depends on the direction a packet flows in,
864and on whether the packet is fragmented.
865Henceforth,
866.Va csum_flags
867or
868.Va csum_data
869of a packet
870will denote the corresponding field of the
871.Va m_pkthdr
872member of the leading
873.Vt mbuf
874in the
875.Vt mbuf chain
876containing the packet.
877.Pp
878On output, checksum offloading is attempted after the outgoing
879interface has been determined for a packet.
880The interface-specific field
881.Va ifnet.if_data.ifi_hwassist
882(see
883.Xr ifnet 9 )
884is consulted for the capabilities of the interface to assist in
885computing checksums.
886The
887.Va csum_flags
888field of the packet header is set to indicate which actions the interface
889is supposed to perform on it.
890The actions unsupported by the network interface are done in the
891software prior to passing the packet down to the interface driver;
892such actions will never be requested through
893.Va csum_flags .
894.Pp
895The flags demanding a particular action from an interface are as follows:
896.Bl -tag -width ".Dv CSUM_TCP" -offset indent
897.It Dv CSUM_IP
898The IP header checksum is to be computed and stored in the
899corresponding field of the packet.
900The hardware is expected to know the format of an IP header
901to determine the offset of the IP checksum field.
902.It Dv CSUM_TCP
903The TCP checksum is to be computed.
904(See below.)
905.It Dv CSUM_UDP
906The UDP checksum is to be computed.
907(See below.)
908.El
909.Pp
910Should a TCP or UDP checksum be offloaded to the hardware,
911the field
912.Va csum_data
913will contain the byte offset of the checksum field relative to the
914end of the IP header.
915In this case, the checksum field will be initially
916set by the TCP/IP module to the checksum of the pseudo header
917defined by the TCP and UDP specifications.
918.Pp
919For outbound packets which have been fragmented
920by the host CPU, the following will also be true,
921regardless of the checksum flag settings:
922.Bl -bullet -offset indent
923.It
924all fragments will have the flag
925.Dv M_FRAG
926set in their
927.Va m_flags
928field;
929.It
930the first and the last fragments in the chain will have
931.Dv M_FIRSTFRAG
932or
933.Dv M_LASTFRAG
934set in their
935.Va m_flags ,
936correspondingly;
937.It
938the first fragment in the chain will have the total number
939of fragments contained in its
940.Va csum_data
941field.
942.El
943.Pp
944The last rule for fragmented packets takes precedence over the one
945for a TCP or UDP checksum.
946Nevertheless, offloading a TCP or UDP checksum is possible for a
947fragmented packet if the flag
948.Dv CSUM_IP_FRAGS
949is set in the field
950.Va ifnet.if_data.ifi_hwassist
951associated with the network interface.
952However, in this case the interface is expected to figure out
953the location of the checksum field within the sequence of fragments
954by itself because
955.Va csum_data
956contains a fragment count instead of a checksum offset value.
957.Pp
958On input, an interface indicates the actions it has performed
959on a packet by setting one or more of the following flags in
960.Va csum_flags
961associated with the packet:
962.Bl -tag -width ".Dv CSUM_IP_CHECKED" -offset indent
963.It Dv CSUM_IP_CHECKED
964The IP header checksum has been computed.
965.It Dv CSUM_IP_VALID
966The IP header has a valid checksum.
967This flag can appear only in combination with
968.Dv CSUM_IP_CHECKED .
969.It Dv CSUM_DATA_VALID
970The checksum of the data portion of the IP packet has been computed
971and stored in the field
972.Va csum_data
973in network byte order.
974.It Dv CSUM_PSEUDO_HDR
975Can be set only along with
976.Dv CSUM_DATA_VALID
977to indicate that the IP data checksum found in
978.Va csum_data
979allows for the pseudo header defined by the TCP and UDP specifications.
980Otherwise the checksum of the pseudo header must be calculated by
981the host CPU and added to
982.Va csum_data
983to obtain the final checksum to be used for TCP or UDP validation purposes.
984.El
985.Pp
986If a particular network interface just indicates success or
987failure of TCP or UDP checksum validation without returning
988the exact value of the checksum to the host CPU, its driver can mark
989.Dv CSUM_DATA_VALID
990and
991.Dv CSUM_PSEUDO_HDR
992in
993.Va csum_flags ,
994and set
995.Va csum_data
996to
997.Li 0xFFFF
998hexadecimal to indicate a valid checksum.
999It is a peculiarity of the algorithm used that the Internet checksum
1000calculated over any valid packet will be
1001.Li 0xFFFF
1002as long as the original checksum field is included.
1003.Pp
1004For inbound packets which are IP fragments, all
1005.Va csum_data
1006fields will be summed during reassembly to obtain the final checksum
1007value passed to an upper layer in the
1008.Va csum_data
1009field of the reassembled packet.
1010The
1011.Va csum_flags
1012fields of all fragments will be consolidated using logical AND
1013to obtain the final value for
1014.Va csum_flags .
1015Thus, in order to successfully
1016offload checksum computation for fragmented data,
1017all fragments should have the same value of
1018.Va csum_flags .
1019.Sh STRESS TESTING
1020When running a kernel compiled with the option
1021.Dv MBUF_STRESS_TEST ,
1022the following
1023.Xr sysctl 8 Ns
1024-controlled options may be used to create
1025various failure/extreme cases for testing of network drivers
1026and other parts of the kernel that rely on
1027.Vt mbufs .
1028.Bl -tag -width ident
1029.It Va net.inet.ip.mbuf_frag_size
1030Causes
1031.Fn ip_output
1032to fragment outgoing
1033.Vt mbuf chains
1034into fragments of the specified size.
1035Setting this variable to 1 is an excellent way to
1036test the long
1037.Vt mbuf chain
1038handling ability of network drivers.
1039.It Va kern.ipc.m_defragrandomfailures
1040Causes the function
1041.Fn m_defrag
1042to randomly fail, returning
1043.Dv NULL .
1044Any piece of code which uses
1045.Fn m_defrag
1046should be tested with this feature.
1047.El
1048.Sh RETURN VALUES
1049See above.
1050.Sh SEE ALSO
1051.Xr ifnet 9
1052.Sh HISTORY
1053.\" Please correct me if I'm wrong
1054.Vt Mbufs
1055appeared in an early version of
1056.Bx .
1057Besides being used for network packets, they were used
1058to store various dynamic structures, such as routing table
1059entries, interface addresses, protocol control blocks, etc.
1060.Sh AUTHORS
1061The original
1062.Nm
1063man page was written by Yar Tikhiy.
1064