xref: /freebsd/share/man/man9/mbuf.9 (revision 262e143bd46171a6415a5b28af260a5efa2a3db8)
1.\" Copyright (c) 2000 FreeBSD Inc.
2.\" All rights reserved.
3.\"
4.\" Redistribution and use in source and binary forms, with or without
5.\" modification, are permitted provided that the following conditions
6.\" are met:
7.\" 1. Redistributions of source code must retain the above copyright
8.\"    notice, this list of conditions and the following disclaimer.
9.\" 2. Redistributions in binary form must reproduce the above copyright
10.\"    notice, this list of conditions and the following disclaimer in the
11.\"    documentation and/or other materials provided with the distribution.
12.\"
13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16.\" ARE DISCLAIMED.  IN NO EVENT SHALL [your name] OR CONTRIBUTORS BE LIABLE
17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23.\" SUCH DAMAGE.
24.\"
25.\" $FreeBSD$
26.\"
27.Dd November 18, 2005
28.Dt MBUF 9
29.Os
30.\"
31.Sh NAME
32.Nm mbuf
33.Nd "memory management in the kernel IPC subsystem"
34.\"
35.Sh SYNOPSIS
36.In sys/param.h
37.In sys/systm.h
38.In sys/mbuf.h
39.\"
40.Ss Mbuf allocation macros
41.Fn MGET "struct mbuf *mbuf" "int how" "short type"
42.Fn MGETHDR "struct mbuf *mbuf" "int how" "short type"
43.Fn MCLGET "struct mbuf *mbuf" "int how"
44.Fo MEXTADD
45.Fa "struct mbuf *mbuf"
46.Fa "caddr_t buf"
47.Fa "u_int size"
48.Fa "void (*free)(void *opt_args)"
49.Fa "void *opt_args"
50.Fa "short flags"
51.Fa "int type"
52.Fc
53.Fn MEXTFREE "struct mbuf *mbuf"
54.Fn MFREE "struct mbuf *mbuf" "struct mbuf *successor"
55.\"
56.Ss Mbuf utility macros
57.Fn mtod "struct mbuf *mbuf" "type"
58.Fn M_ALIGN "struct mbuf *mbuf" "u_int len"
59.Fn MH_ALIGN "struct mbuf *mbuf" "u_int len"
60.Ft int
61.Fn M_LEADINGSPACE "struct mbuf *mbuf"
62.Ft int
63.Fn M_TRAILINGSPACE "struct mbuf *mbuf"
64.Fn M_MOVE_PKTHDR "struct mbuf *to" "struct mbuf *from"
65.Fn M_PREPEND "struct mbuf *mbuf" "int len" "int how"
66.Fn MCHTYPE "struct mbuf *mbuf" "u_int type"
67.Ft int
68.Fn M_WRITABLE "struct mbuf *mbuf"
69.\"
70.Ss Mbuf allocation functions
71.Ft struct mbuf *
72.Fn m_get "int how" "int type"
73.Ft struct mbuf *
74.Fn m_getm "struct mbuf *orig" "int len" "int how" "int type"
75.Ft struct mbuf *
76.Fn m_getcl "int how" "short type" "int flags"
77.Ft struct mbuf *
78.Fn m_getclr "int how" "int type"
79.Ft struct mbuf *
80.Fn m_gethdr "int how" "int type"
81.Ft struct mbuf *
82.Fn m_free "struct mbuf *mbuf"
83.Ft void
84.Fn m_freem "struct mbuf *mbuf"
85.\"
86.Ss Mbuf utility functions
87.Ft void
88.Fn m_adj "struct mbuf *mbuf" "int len"
89.Ft void
90.Fn m_align "struct mbuf *mbuf" "int len"
91.Ft int
92.Fn m_append "struct mbuf *mbuf" "int len" "c_caddr_t cp"
93.Ft struct mbuf *
94.Fn m_prepend "struct mbuf *mbuf" "int len" "int how"
95.Ft struct mbuf *
96.Fn m_copyup "struct mbuf *mbuf" "int len" "int dstoff"
97.Ft struct mbuf *
98.Fn m_pullup "struct mbuf *mbuf" "int len"
99.Ft struct mbuf *
100.Fn m_copym "struct mbuf *mbuf" "int offset" "int len" "int how"
101.Ft struct mbuf *
102.Fn m_copypacket "struct mbuf *mbuf" "int how"
103.Ft struct mbuf *
104.Fn m_dup "struct mbuf *mbuf" "int how"
105.Ft void
106.Fn m_copydata "const struct mbuf *mbuf" "int offset" "int len" "caddr_t buf"
107.Ft void
108.Fn m_copyback "struct mbuf *mbuf" "int offset" "int len" "caddr_t buf"
109.Ft struct mbuf *
110.Fo m_devget
111.Fa "char *buf"
112.Fa "int len"
113.Fa "int offset"
114.Fa "struct ifnet *ifp"
115.Fa "void (*copy)(char *from, caddr_t to, u_int len)"
116.Fc
117.Ft void
118.Fn m_cat "struct mbuf *m" "struct mbuf *n"
119.Ft u_int
120.Fn m_fixhdr "struct mbuf *mbuf"
121.Ft void
122.Fn m_dup_pkthdr "struct mbuf *to" "struct mbuf *from"
123.Ft void
124.Fn m_move_pkthdr "struct mbuf *to" "struct mbuf *from"
125.Ft u_int
126.Fn m_length "struct mbuf *mbuf" "struct mbuf **last"
127.Ft struct mbuf *
128.Fn m_split "struct mbuf *mbuf" "int len" "int how"
129.Ft int
130.Fn m_apply "struct mbuf *mbuf" "int off" "int len" "int (*f)(void *arg, void *data, u_int len)" "void *arg"
131.Ft struct mbuf *
132.Fn m_getptr "struct mbuf *mbuf" "int loc" "int *off"
133.Ft struct mbuf *
134.Fn m_defrag "struct mbuf *m0" "int how"
135.\"
136.Sh DESCRIPTION
137An
138.Vt mbuf
139is a basic unit of memory management in the kernel IPC subsystem.
140Network packets and socket buffers are stored in
141.Vt mbufs .
142A network packet may span multiple
143.Vt mbufs
144arranged into a
145.Vt mbuf chain
146(linked list),
147which allows adding or trimming
148network headers with little overhead.
149.Pp
150While a developer should not bother with
151.Vt mbuf
152internals without serious
153reason in order to avoid incompatibilities with future changes, it
154is useful to understand the general structure of an
155.Vt mbuf .
156.Pp
157An
158.Vt mbuf
159consists of a variable-sized header and a small internal
160buffer for data.
161The total size of an
162.Vt mbuf ,
163.Dv MSIZE ,
164is a constant defined in
165.In sys/param.h .
166The
167.Vt mbuf
168header includes:
169.Pp
170.Bl -tag -width "m_nextpkt" -offset indent
171.It Va m_next
172.Pq Vt struct mbuf *
173A pointer to the next
174.Vt mbuf
175in the
176.Vt mbuf chain .
177.It Va m_nextpkt
178.Pq Vt struct mbuf *
179A pointer to the next
180.Vt mbuf chain
181in the queue.
182.It Va m_data
183.Pq Vt caddr_t
184A pointer to data attached to this
185.Vt mbuf .
186.It Va m_len
187.Pq Vt int
188The length of the data.
189.It Va m_type
190.Pq Vt short
191The type of the data.
192.It Va m_flags
193.Pq Vt int
194The
195.Vt mbuf
196flags.
197.El
198.Pp
199The
200.Vt mbuf
201flag bits are defined as follows:
202.Bd -literal
203/* mbuf flags */
204#define	M_EXT		0x0001	/* has associated external storage */
205#define	M_PKTHDR	0x0002	/* start of record */
206#define	M_EOR		0x0004	/* end of record */
207#define	M_RDONLY	0x0008	/* associated data marked read-only */
208#define	M_PROTO1	0x0010	/* protocol-specific */
209#define	M_PROTO2	0x0020 	/* protocol-specific */
210#define	M_PROTO3	0x0040	/* protocol-specific */
211#define	M_PROTO4	0x0080	/* protocol-specific */
212#define	M_PROTO5	0x0100	/* protocol-specific */
213#define	M_PROTO6	0x4000	/* protocol-specific (avoid M_BCAST conflict) */
214#define	M_FREELIST	0x8000	/* mbuf is on the free list */
215
216/* mbuf pkthdr flags (also stored in m_flags) */
217#define	M_BCAST		0x0200	/* send/received as link-level broadcast */
218#define	M_MCAST		0x0400	/* send/received as link-level multicast */
219#define	M_FRAG		0x0800	/* packet is fragment of larger packet */
220#define	M_FIRSTFRAG	0x1000	/* packet is first fragment */
221#define	M_LASTFRAG	0x2000	/* packet is last fragment */
222.Ed
223.Pp
224The available
225.Vt mbuf
226types are defined as follows:
227.Bd -literal
228/* mbuf types */
229#define	MT_DATA		1	/* dynamic (data) allocation */
230#define	MT_HEADER	2	/* packet header */
231#define	MT_SONAME	8	/* socket name */
232#define	MT_FTABLE	11	/* fragment reassembly header */
233#define	MT_CONTROL	14	/* extra-data protocol message */
234#define	MT_OOBDATA	15	/* expedited data */
235.Ed
236.Pp
237If the
238.Dv M_PKTHDR
239flag is set, a
240.Vt struct pkthdr Va m_pkthdr
241is added to the
242.Vt mbuf
243header.
244It contains a pointer to the interface
245the packet has been received from
246.Pq Vt struct ifnet Va *rcvif ,
247and the total packet length
248.Pq Vt int Va len .
249Optionally, it may also contain an attached list of packet tags
250.Pq Vt "struct m_tag" .
251See
252.Xr mbuf_tags 9
253for details.
254Fields used in offloading checksum calculation to the hardware are kept in
255.Va m_pkthdr
256as well.
257See
258.Sx HARDWARE-ASSISTED CHECKSUM CALCULATION
259for details.
260.Pp
261If small enough, data is stored in the internal data buffer of an
262.Vt mbuf .
263If the data is sufficiently large, another
264.Vt mbuf
265may be added to the
266.Vt mbuf chain ,
267or external storage may be associated with the
268.Vt mbuf .
269.Dv MHLEN
270bytes of data can fit into an
271.Vt mbuf
272with the
273.Dv M_PKTHDR
274flag set,
275.Dv MLEN
276bytes can otherwise.
277.Pp
278If external storage is being associated with an
279.Vt mbuf ,
280the
281.Va m_ext
282header is added at the cost of losing the internal data buffer.
283It includes a pointer to external storage, the size of the storage,
284a pointer to a function used for freeing the storage,
285a pointer to an optional argument that can be passed to the function,
286and a pointer to a reference counter.
287An
288.Vt mbuf
289using external storage has the
290.Dv M_EXT
291flag set.
292.Pp
293The system supplies a macro for allocating the desired external storage
294buffer,
295.Dv MEXTADD .
296.Pp
297The allocation and management of the reference counter is handled by the
298subsystem.
299.Pp
300The system also supplies a default type of external storage buffer called an
301.Vt mbuf cluster .
302.Vt Mbuf clusters
303can be allocated and configured with the use of the
304.Dv MCLGET
305macro.
306Each
307.Vt mbuf cluster
308is
309.Dv MCLBYTES
310in size, where MCLBYTES is a machine-dependent constant.
311The system defines an advisory macro
312.Dv MINCLSIZE ,
313which is the smallest amount of data to put into an
314.Vt mbuf cluster .
315It is equal to the sum of
316.Dv MLEN
317and
318.Dv MHLEN .
319It is typically preferable to store data into the data region of an
320.Vt mbuf ,
321if size permits, as opposed to allocating a separate
322.Vt mbuf cluster
323to hold the same data.
324.\"
325.Ss Macros and Functions
326There are numerous predefined macros and functions that provide the
327developer with common utilities.
328.\"
329.Bl -ohang -offset indent
330.It Fn mtod mbuf type
331Convert an
332.Fa mbuf
333pointer to a data pointer.
334The macro expands to the data pointer cast to the pointer of the specified
335.Fa type .
336.Sy Note :
337It is advisable to ensure that there is enough contiguous data in
338.Fa mbuf .
339See
340.Fn m_pullup
341for details.
342.It Fn MGET mbuf how type
343Allocate an
344.Vt mbuf
345and initialize it to contain internal data.
346.Fa mbuf
347will point to the allocated
348.Vt mbuf
349on success, or be set to
350.Dv NULL
351on failure.
352The
353.Fa how
354argument is to be set to
355.Dv M_TRYWAIT
356or
357.Dv M_DONTWAIT .
358It specifies whether the caller is willing to block if necessary.
359If
360.Fa how
361is set to
362.Dv M_TRYWAIT ,
363a failed allocation will result in the caller being put
364to sleep for a designated
365kern.ipc.mbuf_wait
366.Xr ( sysctl 8
367tunable)
368number of ticks.
369A number of other functions and macros related to
370.Vt mbufs
371have the same argument because they may
372at some point need to allocate new
373.Vt mbufs .
374.Pp
375Programmers should be careful not to confuse the
376.Vt mbuf
377allocation flag
378.Dv M_DONTWAIT
379with the
380.Xr malloc 9
381allocation flag,
382.Dv M_NOWAIT .
383They are not the same.
384.It Fn MGETHDR mbuf how type
385Allocate an
386.Vt mbuf
387and initialize it to contain a packet header
388and internal data.
389See
390.Fn MGET
391for details.
392.It Fn MCLGET mbuf how
393Allocate and attach an
394.Vt mbuf cluster
395to
396.Fa mbuf .
397If the macro fails, the
398.Dv M_EXT
399flag will not be set in
400.Fa mbuf .
401.It Fn M_ALIGN mbuf len
402Set the pointer
403.Fa mbuf->m_data
404to place an object of the size
405.Fa len
406at the end of the internal data area of
407.Fa mbuf ,
408long word aligned.
409Applicable only if
410.Fa mbuf
411is newly allocated with
412.Fn MGET
413or
414.Fn m_get .
415.It Fn MH_ALIGN mbuf len
416Serves the same purpose as
417.Fn M_ALIGN
418does, but only for
419.Fa mbuf
420newly allocated with
421.Fn MGETHDR
422or
423.Fn m_gethdr ,
424or initialized by
425.Fn m_dup_pkthdr
426or
427.Fn m_move_pkthdr .
428.It Fn m_align mbuf len
429Services the same purpose as
430.Fn M_ALIGN
431but handles any type of mbuf.
432.It Fn M_LEADINGSPACE mbuf
433Returns the number of bytes available before the beginning
434of data in
435.Fa mbuf .
436.It Fn M_TRAILINGSPACE mbuf
437Returns the number of bytes available after the end of data in
438.Fa mbuf .
439.It Fn M_PREPEND mbuf len how
440This macro operates on an
441.Vt mbuf chain .
442It is an optimized wrapper for
443.Fn m_prepend
444that can make use of possible empty space before data
445(e.g.\& left after trimming of a link-layer header).
446The new
447.Vt mbuf chain
448pointer or
449.Dv NULL
450is in
451.Fa mbuf
452after the call.
453.It Fn M_MOVE_PKTHDR to from
454Using this macro is equivalent to calling
455.Fn m_move_pkthdr to from .
456.It Fn M_WRITABLE mbuf
457This macro will evaluate true if
458.Fa mbuf
459is not marked
460.Dv M_RDONLY
461and if either
462.Fa mbuf
463does not contain external storage or,
464if it does,
465then if the reference count of the storage is not greater than 1.
466The
467.Dv M_RDONLY
468flag can be set in
469.Fa mbuf->m_flags .
470This can be achieved during setup of the external storage,
471by passing the
472.Dv M_RDONLY
473bit as a
474.Fa flags
475argument to the
476.Fn MEXTADD
477macro, or can be directly set in individual
478.Vt mbufs .
479.It Fn MCHTYPE mbuf type
480Change the type of
481.Fa mbuf
482to
483.Fa type .
484This is a relatively expensive operation and should be avoided.
485.El
486.Pp
487The functions are:
488.Bl -ohang -offset indent
489.It Fn m_get how type
490A function version of
491.Fn MGET
492for non-critical paths.
493.It Fn m_getm orig len how type
494Allocate
495.Fa len
496bytes worth of
497.Vt mbufs
498and
499.Vt mbuf clusters
500if necessary and append the resulting allocated
501.Vt mbuf chain
502to the
503.Vt mbuf chain
504.Fa orig ,
505if it is
506.No non- Ns Dv NULL .
507If the allocation fails at any point,
508free whatever was allocated and return
509.Dv NULL .
510If
511.Fa orig
512is
513.No non- Ns Dv NULL ,
514it will not be freed.
515It is possible to use
516.Fn m_getm
517to either append
518.Fa len
519bytes to an existing
520.Vt mbuf
521or
522.Vt mbuf chain
523(for example, one which may be sitting in a pre-allocated ring)
524or to simply perform an all-or-nothing
525.Vt mbuf
526and
527.Vt mbuf cluster
528allocation.
529.It Fn m_gethdr how type
530A function version of
531.Fn MGETHDR
532for non-critical paths.
533.It Fn m_getcl how type flags
534Fetch an
535.Vt mbuf
536with a
537.Vt mbuf cluster
538attached to it.
539If one of the allocations fails, the entire allocation fails.
540This routine is the preferred way of fetching both the
541.Vt mbuf
542and
543.Vt mbuf cluster
544together, as it avoids having to unlock/relock between allocations.
545Returns
546.Dv NULL
547on failure.
548.It Fn m_getclr how type
549Allocate an
550.Vt mbuf
551and zero out the data region.
552.It Fn m_free mbuf
553Frees
554.Vt mbuf .
555Returns
556.Va m_next
557of the freed
558.Vt mbuf .
559.El
560.Pp
561The functions below operate on
562.Vt mbuf chains .
563.Bl -ohang -offset indent
564.It Fn m_freem mbuf
565Free an entire
566.Vt mbuf chain ,
567including any external storage.
568.\"
569.It Fn m_adj mbuf len
570Trim
571.Fa len
572bytes from the head of an
573.Vt mbuf chain
574if
575.Fa len
576is positive, from the tail otherwise.
577.\"
578.It Fn m_append mbuf len cp
579Append
580.Vt len
581bytes of data
582.Vt cp
583to the
584.Vt mbuf chain .
585Extend the mbuf chain if the new data does not fit in
586existing space.
587.\"
588.It Fn m_prepend mbuf len how
589Allocate a new
590.Vt mbuf
591and prepend it to the
592.Vt mbuf chain ,
593handle
594.Dv M_PKTHDR
595properly.
596.Sy Note :
597It does not allocate any
598.Vt mbuf clusters ,
599so
600.Fa len
601must be less than
602.Dv MLEN
603or
604.Dv MHLEN ,
605depending on the
606.Dv M_PKTHDR
607flag setting.
608.\"
609.It Fn m_copyup mbuf len dstoff
610Similar to
611.Fn m_pullup
612but copies
613.Fa len
614bytes of data into a new mbuf at
615.Fa dstoff
616bytes into the mbuf.
617The
618.Fa dstoff
619argument aligns the data and leaves room for a link layer header.
620Returns the new
621.Vt mbuf chain
622on success,
623and frees the
624.Vt mbuf chain
625and returns
626.Dv NULL
627on failure.
628.Sy Note :
629The function does not allocate
630.Vt mbuf clusters ,
631so
632.Fa len + dstoff
633must be less than
634.Dv MHLEN .
635.\"
636.It Fn m_pullup mbuf len
637Arrange that the first
638.Fa len
639bytes of an
640.Vt mbuf chain
641are contiguous and lay in the data area of
642.Fa mbuf ,
643so they are accessible with
644.Fn mtod mbuf type .
645Return the new
646.Vt mbuf chain
647on success,
648.Dv NULL
649on failure
650(the
651.Vt mbuf chain
652is freed in this case).
653.Sy Note :
654It does not allocate any
655.Vt mbuf clusters ,
656so
657.Fa len
658must be less than
659.Dv MHLEN .
660.\"
661.It Fn m_copym mbuf offset len how
662Make a copy of an
663.Vt mbuf chain
664starting
665.Fa offset
666bytes from the beginning, continuing for
667.Fa len
668bytes.
669If
670.Fa len
671is
672.Dv M_COPYALL ,
673copy to the end of the
674.Vt mbuf chain .
675.Sy Note :
676The copy is read-only, because the
677.Vt mbuf clusters
678are not copied, only their reference counts are incremented.
679.\"
680.It Fn m_copypacket mbuf how
681Copy an entire packet including header, which must be present.
682This is an optimized version of the common case
683.Fn m_copym mbuf 0 M_COPYALL how .
684.Sy Note :
685the copy is read-only, because the
686.Vt mbuf clusters
687are not copied, only their reference counts are incremented.
688.\"
689.It Fn m_dup mbuf how
690Copy a packet header
691.Vt mbuf chain
692into a completely new
693.Vt mbuf chain ,
694including copying any
695.Vt mbuf clusters .
696Use this instead of
697.Fn m_copypacket
698when you need a writable copy of an
699.Vt mbuf chain .
700.\"
701.It Fn m_copydata mbuf offset len buf
702Copy data from an
703.Vt mbuf chain
704starting
705.Fa off
706bytes from the beginning, continuing for
707.Fa len
708bytes, into the indicated buffer
709.Fa buf .
710.\"
711.It Fn m_copyback mbuf offset len buf
712Copy
713.Fa len
714bytes from the buffer
715.Fa buf
716back into the indicated
717.Vt mbuf chain ,
718starting at
719.Fa offset
720bytes from the beginning of the
721.Vt mbuf chain ,
722extending the
723.Vt mbuf chain
724if necessary.
725.Sy Note :
726It does not allocate any
727.Vt mbuf clusters ,
728just adds
729.Vt mbufs
730to the
731.Vt mbuf chain .
732It is safe to set
733.Fa offset
734beyond the current
735.Vt mbuf chain
736end: zeroed
737.Vt mbufs
738will be allocated to fill the space.
739.\"
740.It Fn m_length mbuf last
741Return the length of the
742.Vt mbuf chain ,
743and optionally a pointer to the last
744.Vt mbuf .
745.\"
746.It Fn m_dup_pkthdr to from how
747Upon the function's completion, the
748.Vt mbuf
749.Fa to
750will contain an identical copy of
751.Fa from->m_pkthdr
752and the per-packet attributes found in the
753.Vt mbuf chain
754.Fa from .
755The
756.Vt mbuf
757.Fa from
758must have the flag
759.Dv M_PKTHDR
760initially set, and
761.Fa to
762must be empty on entry.
763.\"
764.It Fn m_move_pkthdr to from
765Move
766.Va m_pkthdr
767and the per-packet attributes from the
768.Vt mbuf chain
769.Fa from
770to the
771.Vt mbuf
772.Fa to .
773The
774.Vt mbuf
775.Fa from
776must have the flag
777.Dv M_PKTHDR
778initially set, and
779.Fa to
780must be empty on entry.
781Upon the function's completion,
782.Fa from
783will have the flag
784.Dv M_PKTHDR
785and the per-packet attributes cleared.
786.\"
787.It Fn m_fixhdr mbuf
788Set the packet-header length to the length of the
789.Vt mbuf chain .
790.\"
791.It Fn m_devget buf len offset ifp copy
792Copy data from a device local memory pointed to by
793.Fa buf
794to an
795.Vt mbuf chain .
796The copy is done using a specified copy routine
797.Fa copy ,
798or
799.Fn bcopy
800if
801.Fa copy
802is
803.Dv NULL .
804.\"
805.It Fn m_cat m n
806Concatenate
807.Fa n
808to
809.Fa m .
810Both
811.Vt mbuf chains
812must be of the same type.
813.Fa N
814is still valid after the function returned.
815.Sy Note :
816It does not handle
817.Dv M_PKTHDR
818and friends.
819.\"
820.It Fn m_split mbuf len how
821Partition an
822.Vt mbuf chain
823in two pieces, returning the tail:
824all but the first
825.Fa len
826bytes.
827In case of failure, it returns
828.Dv NULL
829and attempts to restore the
830.Vt mbuf chain
831to its original state.
832.\"
833.It Fn m_apply mbuf off len f arg
834Apply a function to an
835.Vt mbuf chain ,
836at offset
837.Fa off ,
838for length
839.Fa len
840bytes.
841Typically used to avoid calls to
842.Fn m_pullup
843which would otherwise be unnecessary or undesirable.
844.Fa arg
845is a convenience argument which is passed to the callback function
846.Fa f .
847.Pp
848Each time
849.Fn f
850is called, it will be passed
851.Fa arg ,
852a pointer to the
853.Fa data
854in the current mbuf, and the length
855.Fa len
856of the data in this mbuf to which the function should be applied.
857.Pp
858The function should return zero to indicate success;
859otherwise, if an error is indicated, then
860.Fn m_apply
861will return the error and stop iterating through the
862.Vt mbuf chain .
863.\"
864.It Fn m_getptr mbuf loc off
865Return a pointer to the mbuf containing the data located at
866.Fa loc
867bytes from the beginning of the
868.Vt mbuf chain .
869The corresponding offset into the mbuf will be stored in
870.Fa *off .
871.It Fn m_defrag m0 how
872Defragment an mbuf chain, returning the shortest possible
873chain of mbufs and clusters.
874If allocation fails and this can not be completed,
875.Dv NULL
876will be returned and the original chain will be unchanged.
877Upon success, the original chain will be freed and the new
878chain will be returned.
879.Fa how
880should be either
881.Dv M_TRYWAIT
882or
883.Dv M_DONTWAIT ,
884depending on the caller's preference.
885.Pp
886This function is especially useful in network drivers, where
887certain long mbuf chains must be shortened before being added
888to TX descriptor lists.
889.El
890.Sh HARDWARE-ASSISTED CHECKSUM CALCULATION
891This section currently applies to TCP/IP only.
892In order to save the host CPU resources, computing checksums is
893offloaded to the network interface hardware if possible.
894The
895.Va m_pkthdr
896member of the leading
897.Vt mbuf
898of a packet contains two fields used for that purpose,
899.Vt int Va csum_flags
900and
901.Vt int Va csum_data .
902The meaning of those fields depends on the direction a packet flows in,
903and on whether the packet is fragmented.
904Henceforth,
905.Va csum_flags
906or
907.Va csum_data
908of a packet
909will denote the corresponding field of the
910.Va m_pkthdr
911member of the leading
912.Vt mbuf
913in the
914.Vt mbuf chain
915containing the packet.
916.Pp
917On output, checksum offloading is attempted after the outgoing
918interface has been determined for a packet.
919The interface-specific field
920.Va ifnet.if_data.ifi_hwassist
921(see
922.Xr ifnet 9 )
923is consulted for the capabilities of the interface to assist in
924computing checksums.
925The
926.Va csum_flags
927field of the packet header is set to indicate which actions the interface
928is supposed to perform on it.
929The actions unsupported by the network interface are done in the
930software prior to passing the packet down to the interface driver;
931such actions will never be requested through
932.Va csum_flags .
933.Pp
934The flags demanding a particular action from an interface are as follows:
935.Bl -tag -width ".Dv CSUM_TCP" -offset indent
936.It Dv CSUM_IP
937The IP header checksum is to be computed and stored in the
938corresponding field of the packet.
939The hardware is expected to know the format of an IP header
940to determine the offset of the IP checksum field.
941.It Dv CSUM_TCP
942The TCP checksum is to be computed.
943(See below.)
944.It Dv CSUM_UDP
945The UDP checksum is to be computed.
946(See below.)
947.El
948.Pp
949Should a TCP or UDP checksum be offloaded to the hardware,
950the field
951.Va csum_data
952will contain the byte offset of the checksum field relative to the
953end of the IP header.
954In this case, the checksum field will be initially
955set by the TCP/IP module to the checksum of the pseudo header
956defined by the TCP and UDP specifications.
957.Pp
958For outbound packets which have been fragmented
959by the host CPU, the following will also be true,
960regardless of the checksum flag settings:
961.Bl -bullet -offset indent
962.It
963all fragments will have the flag
964.Dv M_FRAG
965set in their
966.Va m_flags
967field;
968.It
969the first and the last fragments in the chain will have
970.Dv M_FIRSTFRAG
971or
972.Dv M_LASTFRAG
973set in their
974.Va m_flags ,
975correspondingly;
976.It
977the first fragment in the chain will have the total number
978of fragments contained in its
979.Va csum_data
980field.
981.El
982.Pp
983The last rule for fragmented packets takes precedence over the one
984for a TCP or UDP checksum.
985Nevertheless, offloading a TCP or UDP checksum is possible for a
986fragmented packet if the flag
987.Dv CSUM_IP_FRAGS
988is set in the field
989.Va ifnet.if_data.ifi_hwassist
990associated with the network interface.
991However, in this case the interface is expected to figure out
992the location of the checksum field within the sequence of fragments
993by itself because
994.Va csum_data
995contains a fragment count instead of a checksum offset value.
996.Pp
997On input, an interface indicates the actions it has performed
998on a packet by setting one or more of the following flags in
999.Va csum_flags
1000associated with the packet:
1001.Bl -tag -width ".Dv CSUM_IP_CHECKED" -offset indent
1002.It Dv CSUM_IP_CHECKED
1003The IP header checksum has been computed.
1004.It Dv CSUM_IP_VALID
1005The IP header has a valid checksum.
1006This flag can appear only in combination with
1007.Dv CSUM_IP_CHECKED .
1008.It Dv CSUM_DATA_VALID
1009The checksum of the data portion of the IP packet has been computed
1010and stored in the field
1011.Va csum_data
1012in network byte order.
1013.It Dv CSUM_PSEUDO_HDR
1014Can be set only along with
1015.Dv CSUM_DATA_VALID
1016to indicate that the IP data checksum found in
1017.Va csum_data
1018allows for the pseudo header defined by the TCP and UDP specifications.
1019Otherwise the checksum of the pseudo header must be calculated by
1020the host CPU and added to
1021.Va csum_data
1022to obtain the final checksum to be used for TCP or UDP validation purposes.
1023.El
1024.Pp
1025If a particular network interface just indicates success or
1026failure of TCP or UDP checksum validation without returning
1027the exact value of the checksum to the host CPU, its driver can mark
1028.Dv CSUM_DATA_VALID
1029and
1030.Dv CSUM_PSEUDO_HDR
1031in
1032.Va csum_flags ,
1033and set
1034.Va csum_data
1035to
1036.Li 0xFFFF
1037hexadecimal to indicate a valid checksum.
1038It is a peculiarity of the algorithm used that the Internet checksum
1039calculated over any valid packet will be
1040.Li 0xFFFF
1041as long as the original checksum field is included.
1042.Pp
1043For inbound packets which are IP fragments, all
1044.Va csum_data
1045fields will be summed during reassembly to obtain the final checksum
1046value passed to an upper layer in the
1047.Va csum_data
1048field of the reassembled packet.
1049The
1050.Va csum_flags
1051fields of all fragments will be consolidated using logical AND
1052to obtain the final value for
1053.Va csum_flags .
1054Thus, in order to successfully
1055offload checksum computation for fragmented data,
1056all fragments should have the same value of
1057.Va csum_flags .
1058.Sh STRESS TESTING
1059When running a kernel compiled with the option
1060.Dv MBUF_STRESS_TEST ,
1061the following
1062.Xr sysctl 8 Ns
1063-controlled options may be used to create
1064various failure/extreme cases for testing of network drivers
1065and other parts of the kernel that rely on
1066.Vt mbufs .
1067.Bl -tag -width ident
1068.It Va net.inet.ip.mbuf_frag_size
1069Causes
1070.Fn ip_output
1071to fragment outgoing
1072.Vt mbuf chains
1073into fragments of the specified size.
1074Setting this variable to 1 is an excellent way to
1075test the long
1076.Vt mbuf chain
1077handling ability of network drivers.
1078.It Va kern.ipc.m_defragrandomfailures
1079Causes the function
1080.Fn m_defrag
1081to randomly fail, returning
1082.Dv NULL .
1083Any piece of code which uses
1084.Fn m_defrag
1085should be tested with this feature.
1086.El
1087.Sh RETURN VALUES
1088See above.
1089.Sh SEE ALSO
1090.Xr ifnet 9 ,
1091.Xr mbuf_tags 9
1092.Sh HISTORY
1093.\" Please correct me if I'm wrong
1094.Vt Mbufs
1095appeared in an early version of
1096.Bx .
1097Besides being used for network packets, they were used
1098to store various dynamic structures, such as routing table
1099entries, interface addresses, protocol control blocks, etc.
1100.Sh AUTHORS
1101The original
1102.Nm
1103manual page was written by Yar Tikhiy.
1104