xref: /freebsd/share/man/man9/mbuf.9 (revision 1e413cf93298b5b97441a21d9a50fdcd0ee9945e)
1.\" Copyright (c) 2000 FreeBSD Inc.
2.\" All rights reserved.
3.\"
4.\" Redistribution and use in source and binary forms, with or without
5.\" modification, are permitted provided that the following conditions
6.\" are met:
7.\" 1. Redistributions of source code must retain the above copyright
8.\"    notice, this list of conditions and the following disclaimer.
9.\" 2. Redistributions in binary form must reproduce the above copyright
10.\"    notice, this list of conditions and the following disclaimer in the
11.\"    documentation and/or other materials provided with the distribution.
12.\"
13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16.\" ARE DISCLAIMED.  IN NO EVENT SHALL [your name] OR CONTRIBUTORS BE LIABLE
17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23.\" SUCH DAMAGE.
24.\"
25.\" $FreeBSD$
26.\"
27.Dd February 26, 2007
28.Dt MBUF 9
29.Os
30.\"
31.Sh NAME
32.Nm mbuf
33.Nd "memory management in the kernel IPC subsystem"
34.\"
35.Sh SYNOPSIS
36.In sys/param.h
37.In sys/systm.h
38.In sys/mbuf.h
39.\"
40.Ss Mbuf allocation macros
41.Fn MGET "struct mbuf *mbuf" "int how" "short type"
42.Fn MGETHDR "struct mbuf *mbuf" "int how" "short type"
43.Fn MCLGET "struct mbuf *mbuf" "int how"
44.Fo MEXTADD
45.Fa "struct mbuf *mbuf"
46.Fa "caddr_t buf"
47.Fa "u_int size"
48.Fa "void (*free)(void *opt_args)"
49.Fa "void *opt_args"
50.Fa "short flags"
51.Fa "int type"
52.Fc
53.Fn MEXTFREE "struct mbuf *mbuf"
54.Fn MFREE "struct mbuf *mbuf" "struct mbuf *successor"
55.\"
56.Ss Mbuf utility macros
57.Fn mtod "struct mbuf *mbuf" "type"
58.Fn M_ALIGN "struct mbuf *mbuf" "u_int len"
59.Fn MH_ALIGN "struct mbuf *mbuf" "u_int len"
60.Ft int
61.Fn M_LEADINGSPACE "struct mbuf *mbuf"
62.Ft int
63.Fn M_TRAILINGSPACE "struct mbuf *mbuf"
64.Fn M_MOVE_PKTHDR "struct mbuf *to" "struct mbuf *from"
65.Fn M_PREPEND "struct mbuf *mbuf" "int len" "int how"
66.Fn MCHTYPE "struct mbuf *mbuf" "u_int type"
67.Ft int
68.Fn M_WRITABLE "struct mbuf *mbuf"
69.\"
70.Ss Mbuf allocation functions
71.Ft struct mbuf *
72.Fn m_get "int how" "int type"
73.Ft struct mbuf *
74.Fn m_getm "struct mbuf *orig" "int len" "int how" "int type"
75.Ft struct mbuf *
76.Fn m_getcl "int how" "short type" "int flags"
77.Ft struct mbuf *
78.Fn m_getclr "int how" "int type"
79.Ft struct mbuf *
80.Fn m_gethdr "int how" "int type"
81.Ft struct mbuf *
82.Fn m_free "struct mbuf *mbuf"
83.Ft void
84.Fn m_freem "struct mbuf *mbuf"
85.\"
86.Ss Mbuf utility functions
87.Ft void
88.Fn m_adj "struct mbuf *mbuf" "int len"
89.Ft void
90.Fn m_align "struct mbuf *mbuf" "int len"
91.Ft int
92.Fn m_append "struct mbuf *mbuf" "int len" "c_caddr_t cp"
93.Ft struct mbuf *
94.Fn m_prepend "struct mbuf *mbuf" "int len" "int how"
95.Ft struct mbuf *
96.Fn m_copyup "struct mbuf *mbuf" "int len" "int dstoff"
97.Ft struct mbuf *
98.Fn m_pullup "struct mbuf *mbuf" "int len"
99.Ft struct mbuf *
100.Fn m_pulldown "struct mbuf *mbuf" "int offset" "int len" "int *offsetp"
101.Ft struct mbuf *
102.Fn m_copym "struct mbuf *mbuf" "int offset" "int len" "int how"
103.Ft struct mbuf *
104.Fn m_copypacket "struct mbuf *mbuf" "int how"
105.Ft struct mbuf *
106.Fn m_dup "struct mbuf *mbuf" "int how"
107.Ft void
108.Fn m_copydata "const struct mbuf *mbuf" "int offset" "int len" "caddr_t buf"
109.Ft void
110.Fn m_copyback "struct mbuf *mbuf" "int offset" "int len" "caddr_t buf"
111.Ft struct mbuf *
112.Fo m_devget
113.Fa "char *buf"
114.Fa "int len"
115.Fa "int offset"
116.Fa "struct ifnet *ifp"
117.Fa "void (*copy)(char *from, caddr_t to, u_int len)"
118.Fc
119.Ft void
120.Fn m_cat "struct mbuf *m" "struct mbuf *n"
121.Ft u_int
122.Fn m_fixhdr "struct mbuf *mbuf"
123.Ft void
124.Fn m_dup_pkthdr "struct mbuf *to" "struct mbuf *from"
125.Ft void
126.Fn m_move_pkthdr "struct mbuf *to" "struct mbuf *from"
127.Ft u_int
128.Fn m_length "struct mbuf *mbuf" "struct mbuf **last"
129.Ft struct mbuf *
130.Fn m_split "struct mbuf *mbuf" "int len" "int how"
131.Ft int
132.Fn m_apply "struct mbuf *mbuf" "int off" "int len" "int (*f)(void *arg, void *data, u_int len)" "void *arg"
133.Ft struct mbuf *
134.Fn m_getptr "struct mbuf *mbuf" "int loc" "int *off"
135.Ft struct mbuf *
136.Fn m_defrag "struct mbuf *m0" "int how"
137.Ft struct mbuf *
138.Fn m_unshare "struct mbuf *m0" "int how"
139.\"
140.Sh DESCRIPTION
141An
142.Vt mbuf
143is a basic unit of memory management in the kernel IPC subsystem.
144Network packets and socket buffers are stored in
145.Vt mbufs .
146A network packet may span multiple
147.Vt mbufs
148arranged into a
149.Vt mbuf chain
150(linked list),
151which allows adding or trimming
152network headers with little overhead.
153.Pp
154While a developer should not bother with
155.Vt mbuf
156internals without serious
157reason in order to avoid incompatibilities with future changes, it
158is useful to understand the general structure of an
159.Vt mbuf .
160.Pp
161An
162.Vt mbuf
163consists of a variable-sized header and a small internal
164buffer for data.
165The total size of an
166.Vt mbuf ,
167.Dv MSIZE ,
168is a constant defined in
169.In sys/param.h .
170The
171.Vt mbuf
172header includes:
173.Pp
174.Bl -tag -width "m_nextpkt" -offset indent
175.It Va m_next
176.Pq Vt struct mbuf *
177A pointer to the next
178.Vt mbuf
179in the
180.Vt mbuf chain .
181.It Va m_nextpkt
182.Pq Vt struct mbuf *
183A pointer to the next
184.Vt mbuf chain
185in the queue.
186.It Va m_data
187.Pq Vt caddr_t
188A pointer to data attached to this
189.Vt mbuf .
190.It Va m_len
191.Pq Vt int
192The length of the data.
193.It Va m_type
194.Pq Vt short
195The type of the data.
196.It Va m_flags
197.Pq Vt int
198The
199.Vt mbuf
200flags.
201.El
202.Pp
203The
204.Vt mbuf
205flag bits are defined as follows:
206.Bd -literal
207/* mbuf flags */
208#define	M_EXT		0x0001	/* has associated external storage */
209#define	M_PKTHDR	0x0002	/* start of record */
210#define	M_EOR		0x0004	/* end of record */
211#define	M_RDONLY	0x0008	/* associated data marked read-only */
212#define	M_PROTO1	0x0010	/* protocol-specific */
213#define	M_PROTO2	0x0020 	/* protocol-specific */
214#define	M_PROTO3	0x0040	/* protocol-specific */
215#define	M_PROTO4	0x0080	/* protocol-specific */
216#define	M_PROTO5	0x0100	/* protocol-specific */
217#define	M_PROTO6	0x4000	/* protocol-specific (avoid M_BCAST conflict) */
218#define	M_FREELIST	0x8000	/* mbuf is on the free list */
219
220/* mbuf pkthdr flags (also stored in m_flags) */
221#define	M_BCAST		0x0200	/* send/received as link-level broadcast */
222#define	M_MCAST		0x0400	/* send/received as link-level multicast */
223#define	M_FRAG		0x0800	/* packet is fragment of larger packet */
224#define	M_FIRSTFRAG	0x1000	/* packet is first fragment */
225#define	M_LASTFRAG	0x2000	/* packet is last fragment */
226.Ed
227.Pp
228The available
229.Vt mbuf
230types are defined as follows:
231.Bd -literal
232/* mbuf types */
233#define	MT_DATA		1	/* dynamic (data) allocation */
234#define	MT_HEADER	MT_DATA	/* packet header */
235#define	MT_SONAME	8	/* socket name */
236#define	MT_CONTROL	14	/* extra-data protocol message */
237#define	MT_OOBDATA	15	/* expedited data */
238.Ed
239.Pp
240If the
241.Dv M_PKTHDR
242flag is set, a
243.Vt struct pkthdr Va m_pkthdr
244is added to the
245.Vt mbuf
246header.
247It contains a pointer to the interface
248the packet has been received from
249.Pq Vt struct ifnet Va *rcvif ,
250and the total packet length
251.Pq Vt int Va len .
252Optionally, it may also contain an attached list of packet tags
253.Pq Vt "struct m_tag" .
254See
255.Xr mbuf_tags 9
256for details.
257Fields used in offloading checksum calculation to the hardware are kept in
258.Va m_pkthdr
259as well.
260See
261.Sx HARDWARE-ASSISTED CHECKSUM CALCULATION
262for details.
263.Pp
264If small enough, data is stored in the internal data buffer of an
265.Vt mbuf .
266If the data is sufficiently large, another
267.Vt mbuf
268may be added to the
269.Vt mbuf chain ,
270or external storage may be associated with the
271.Vt mbuf .
272.Dv MHLEN
273bytes of data can fit into an
274.Vt mbuf
275with the
276.Dv M_PKTHDR
277flag set,
278.Dv MLEN
279bytes can otherwise.
280.Pp
281If external storage is being associated with an
282.Vt mbuf ,
283the
284.Va m_ext
285header is added at the cost of losing the internal data buffer.
286It includes a pointer to external storage, the size of the storage,
287a pointer to a function used for freeing the storage,
288a pointer to an optional argument that can be passed to the function,
289and a pointer to a reference counter.
290An
291.Vt mbuf
292using external storage has the
293.Dv M_EXT
294flag set.
295.Pp
296The system supplies a macro for allocating the desired external storage
297buffer,
298.Dv MEXTADD .
299.Pp
300The allocation and management of the reference counter is handled by the
301subsystem.
302.Pp
303The system also supplies a default type of external storage buffer called an
304.Vt mbuf cluster .
305.Vt Mbuf clusters
306can be allocated and configured with the use of the
307.Dv MCLGET
308macro.
309Each
310.Vt mbuf cluster
311is
312.Dv MCLBYTES
313in size, where MCLBYTES is a machine-dependent constant.
314The system defines an advisory macro
315.Dv MINCLSIZE ,
316which is the smallest amount of data to put into an
317.Vt mbuf cluster .
318It is equal to the sum of
319.Dv MLEN
320and
321.Dv MHLEN .
322It is typically preferable to store data into the data region of an
323.Vt mbuf ,
324if size permits, as opposed to allocating a separate
325.Vt mbuf cluster
326to hold the same data.
327.\"
328.Ss Macros and Functions
329There are numerous predefined macros and functions that provide the
330developer with common utilities.
331.\"
332.Bl -ohang -offset indent
333.It Fn mtod mbuf type
334Convert an
335.Fa mbuf
336pointer to a data pointer.
337The macro expands to the data pointer cast to the pointer of the specified
338.Fa type .
339.Sy Note :
340It is advisable to ensure that there is enough contiguous data in
341.Fa mbuf .
342See
343.Fn m_pullup
344for details.
345.It Fn MGET mbuf how type
346Allocate an
347.Vt mbuf
348and initialize it to contain internal data.
349.Fa mbuf
350will point to the allocated
351.Vt mbuf
352on success, or be set to
353.Dv NULL
354on failure.
355The
356.Fa how
357argument is to be set to
358.Dv M_TRYWAIT
359or
360.Dv M_DONTWAIT .
361It specifies whether the caller is willing to block if necessary.
362If
363.Fa how
364is set to
365.Dv M_TRYWAIT ,
366a failed allocation will result in the caller being put
367to sleep for a designated
368kern.ipc.mbuf_wait
369.Xr ( sysctl 8
370tunable)
371number of ticks.
372A number of other functions and macros related to
373.Vt mbufs
374have the same argument because they may
375at some point need to allocate new
376.Vt mbufs .
377.Pp
378Programmers should be careful not to confuse the
379.Vt mbuf
380allocation flag
381.Dv M_DONTWAIT
382with the
383.Xr malloc 9
384allocation flag,
385.Dv M_NOWAIT .
386They are not the same.
387.It Fn MGETHDR mbuf how type
388Allocate an
389.Vt mbuf
390and initialize it to contain a packet header
391and internal data.
392See
393.Fn MGET
394for details.
395.It Fn MCLGET mbuf how
396Allocate and attach an
397.Vt mbuf cluster
398to
399.Fa mbuf .
400If the macro fails, the
401.Dv M_EXT
402flag will not be set in
403.Fa mbuf .
404.It Fn M_ALIGN mbuf len
405Set the pointer
406.Fa mbuf->m_data
407to place an object of the size
408.Fa len
409at the end of the internal data area of
410.Fa mbuf ,
411long word aligned.
412Applicable only if
413.Fa mbuf
414is newly allocated with
415.Fn MGET
416or
417.Fn m_get .
418.It Fn MH_ALIGN mbuf len
419Serves the same purpose as
420.Fn M_ALIGN
421does, but only for
422.Fa mbuf
423newly allocated with
424.Fn MGETHDR
425or
426.Fn m_gethdr ,
427or initialized by
428.Fn m_dup_pkthdr
429or
430.Fn m_move_pkthdr .
431.It Fn m_align mbuf len
432Services the same purpose as
433.Fn M_ALIGN
434but handles any type of mbuf.
435.It Fn M_LEADINGSPACE mbuf
436Returns the number of bytes available before the beginning
437of data in
438.Fa mbuf .
439.It Fn M_TRAILINGSPACE mbuf
440Returns the number of bytes available after the end of data in
441.Fa mbuf .
442.It Fn M_PREPEND mbuf len how
443This macro operates on an
444.Vt mbuf chain .
445It is an optimized wrapper for
446.Fn m_prepend
447that can make use of possible empty space before data
448(e.g.\& left after trimming of a link-layer header).
449The new
450.Vt mbuf chain
451pointer or
452.Dv NULL
453is in
454.Fa mbuf
455after the call.
456.It Fn M_MOVE_PKTHDR to from
457Using this macro is equivalent to calling
458.Fn m_move_pkthdr to from .
459.It Fn M_WRITABLE mbuf
460This macro will evaluate true if
461.Fa mbuf
462is not marked
463.Dv M_RDONLY
464and if either
465.Fa mbuf
466does not contain external storage or,
467if it does,
468then if the reference count of the storage is not greater than 1.
469The
470.Dv M_RDONLY
471flag can be set in
472.Fa mbuf->m_flags .
473This can be achieved during setup of the external storage,
474by passing the
475.Dv M_RDONLY
476bit as a
477.Fa flags
478argument to the
479.Fn MEXTADD
480macro, or can be directly set in individual
481.Vt mbufs .
482.It Fn MCHTYPE mbuf type
483Change the type of
484.Fa mbuf
485to
486.Fa type .
487This is a relatively expensive operation and should be avoided.
488.El
489.Pp
490The functions are:
491.Bl -ohang -offset indent
492.It Fn m_get how type
493A function version of
494.Fn MGET
495for non-critical paths.
496.It Fn m_getm orig len how type
497Allocate
498.Fa len
499bytes worth of
500.Vt mbufs
501and
502.Vt mbuf clusters
503if necessary and append the resulting allocated
504.Vt mbuf chain
505to the
506.Vt mbuf chain
507.Fa orig ,
508if it is
509.No non- Ns Dv NULL .
510If the allocation fails at any point,
511free whatever was allocated and return
512.Dv NULL .
513If
514.Fa orig
515is
516.No non- Ns Dv NULL ,
517it will not be freed.
518It is possible to use
519.Fn m_getm
520to either append
521.Fa len
522bytes to an existing
523.Vt mbuf
524or
525.Vt mbuf chain
526(for example, one which may be sitting in a pre-allocated ring)
527or to simply perform an all-or-nothing
528.Vt mbuf
529and
530.Vt mbuf cluster
531allocation.
532.It Fn m_gethdr how type
533A function version of
534.Fn MGETHDR
535for non-critical paths.
536.It Fn m_getcl how type flags
537Fetch an
538.Vt mbuf
539with a
540.Vt mbuf cluster
541attached to it.
542If one of the allocations fails, the entire allocation fails.
543This routine is the preferred way of fetching both the
544.Vt mbuf
545and
546.Vt mbuf cluster
547together, as it avoids having to unlock/relock between allocations.
548Returns
549.Dv NULL
550on failure.
551.It Fn m_getclr how type
552Allocate an
553.Vt mbuf
554and zero out the data region.
555.It Fn m_free mbuf
556Frees
557.Vt mbuf .
558Returns
559.Va m_next
560of the freed
561.Vt mbuf .
562.El
563.Pp
564The functions below operate on
565.Vt mbuf chains .
566.Bl -ohang -offset indent
567.It Fn m_freem mbuf
568Free an entire
569.Vt mbuf chain ,
570including any external storage.
571.\"
572.It Fn m_adj mbuf len
573Trim
574.Fa len
575bytes from the head of an
576.Vt mbuf chain
577if
578.Fa len
579is positive, from the tail otherwise.
580.\"
581.It Fn m_append mbuf len cp
582Append
583.Vt len
584bytes of data
585.Vt cp
586to the
587.Vt mbuf chain .
588Extend the mbuf chain if the new data does not fit in
589existing space.
590.\"
591.It Fn m_prepend mbuf len how
592Allocate a new
593.Vt mbuf
594and prepend it to the
595.Vt mbuf chain ,
596handle
597.Dv M_PKTHDR
598properly.
599.Sy Note :
600It does not allocate any
601.Vt mbuf clusters ,
602so
603.Fa len
604must be less than
605.Dv MLEN
606or
607.Dv MHLEN ,
608depending on the
609.Dv M_PKTHDR
610flag setting.
611.\"
612.It Fn m_copyup mbuf len dstoff
613Similar to
614.Fn m_pullup
615but copies
616.Fa len
617bytes of data into a new mbuf at
618.Fa dstoff
619bytes into the mbuf.
620The
621.Fa dstoff
622argument aligns the data and leaves room for a link layer header.
623Returns the new
624.Vt mbuf chain
625on success,
626and frees the
627.Vt mbuf chain
628and returns
629.Dv NULL
630on failure.
631.Sy Note :
632The function does not allocate
633.Vt mbuf clusters ,
634so
635.Fa len + dstoff
636must be less than
637.Dv MHLEN .
638.\"
639.It Fn m_pullup mbuf len
640Arrange that the first
641.Fa len
642bytes of an
643.Vt mbuf chain
644are contiguous and lay in the data area of
645.Fa mbuf ,
646so they are accessible with
647.Fn mtod mbuf type .
648It is important to remember that this may involve
649reallocating some mbufs and moving data so all pointers
650referencing data within the old mbuf chain
651must be recalculated or made invalid.
652Return the new
653.Vt mbuf chain
654on success,
655.Dv NULL
656on failure
657(the
658.Vt mbuf chain
659is freed in this case).
660.Sy Note :
661It does not allocate any
662.Vt mbuf clusters ,
663so
664.Fa len
665must be less than
666.Dv MHLEN .
667.\"
668.It Fn m_pulldown mbuf offset len offsetp
669Arrange that
670.Fa len
671bytes between
672.Fa offset
673and
674.Fa offset + len
675in the
676.Vt mbuf chain
677are contiguous and lay in the data area of
678.Fa mbuf ,
679so they are accessible with
680.Fn mtod mbuf type .
681.Fa len must be smaller than, or equal to, the size of an
682.Vt mbuf cluster .
683Return a pointer to an intermediate
684.Vt mbuf
685in the chain containing the requested region;
686the offset in the data region of the
687.Vt mbuf chain
688to the data contained in the returned mbuf is stored in
689.Fa *offsetp .
690If
691.Fa offp
692is NULL, the region may be accessed using
693.Fn mtod mbuf type .
694If
695.Fa offp
696is non-NULL, the region may be accessed using
697.Fn mtod mbuf uint8_t + *offsetp .
698The region of the mbuf chain between its beginning and
699.Fa off
700is not modified, therefore it is safe to hold pointers to data within
701this region before calling
702.Fn m_pulldown .
703.\"
704.It Fn m_copym mbuf offset len how
705Make a copy of an
706.Vt mbuf chain
707starting
708.Fa offset
709bytes from the beginning, continuing for
710.Fa len
711bytes.
712If
713.Fa len
714is
715.Dv M_COPYALL ,
716copy to the end of the
717.Vt mbuf chain .
718.Sy Note :
719The copy is read-only, because the
720.Vt mbuf clusters
721are not copied, only their reference counts are incremented.
722.\"
723.It Fn m_copypacket mbuf how
724Copy an entire packet including header, which must be present.
725This is an optimized version of the common case
726.Fn m_copym mbuf 0 M_COPYALL how .
727.Sy Note :
728the copy is read-only, because the
729.Vt mbuf clusters
730are not copied, only their reference counts are incremented.
731.\"
732.It Fn m_dup mbuf how
733Copy a packet header
734.Vt mbuf chain
735into a completely new
736.Vt mbuf chain ,
737including copying any
738.Vt mbuf clusters .
739Use this instead of
740.Fn m_copypacket
741when you need a writable copy of an
742.Vt mbuf chain .
743.\"
744.It Fn m_copydata mbuf offset len buf
745Copy data from an
746.Vt mbuf chain
747starting
748.Fa off
749bytes from the beginning, continuing for
750.Fa len
751bytes, into the indicated buffer
752.Fa buf .
753.\"
754.It Fn m_copyback mbuf offset len buf
755Copy
756.Fa len
757bytes from the buffer
758.Fa buf
759back into the indicated
760.Vt mbuf chain ,
761starting at
762.Fa offset
763bytes from the beginning of the
764.Vt mbuf chain ,
765extending the
766.Vt mbuf chain
767if necessary.
768.Sy Note :
769It does not allocate any
770.Vt mbuf clusters ,
771just adds
772.Vt mbufs
773to the
774.Vt mbuf chain .
775It is safe to set
776.Fa offset
777beyond the current
778.Vt mbuf chain
779end: zeroed
780.Vt mbufs
781will be allocated to fill the space.
782.\"
783.It Fn m_length mbuf last
784Return the length of the
785.Vt mbuf chain ,
786and optionally a pointer to the last
787.Vt mbuf .
788.\"
789.It Fn m_dup_pkthdr to from how
790Upon the function's completion, the
791.Vt mbuf
792.Fa to
793will contain an identical copy of
794.Fa from->m_pkthdr
795and the per-packet attributes found in the
796.Vt mbuf chain
797.Fa from .
798The
799.Vt mbuf
800.Fa from
801must have the flag
802.Dv M_PKTHDR
803initially set, and
804.Fa to
805must be empty on entry.
806.\"
807.It Fn m_move_pkthdr to from
808Move
809.Va m_pkthdr
810and the per-packet attributes from the
811.Vt mbuf chain
812.Fa from
813to the
814.Vt mbuf
815.Fa to .
816The
817.Vt mbuf
818.Fa from
819must have the flag
820.Dv M_PKTHDR
821initially set, and
822.Fa to
823must be empty on entry.
824Upon the function's completion,
825.Fa from
826will have the flag
827.Dv M_PKTHDR
828and the per-packet attributes cleared.
829.\"
830.It Fn m_fixhdr mbuf
831Set the packet-header length to the length of the
832.Vt mbuf chain .
833.\"
834.It Fn m_devget buf len offset ifp copy
835Copy data from a device local memory pointed to by
836.Fa buf
837to an
838.Vt mbuf chain .
839The copy is done using a specified copy routine
840.Fa copy ,
841or
842.Fn bcopy
843if
844.Fa copy
845is
846.Dv NULL .
847.\"
848.It Fn m_cat m n
849Concatenate
850.Fa n
851to
852.Fa m .
853Both
854.Vt mbuf chains
855must be of the same type.
856.Fa N
857is still valid after the function returned.
858.Sy Note :
859It does not handle
860.Dv M_PKTHDR
861and friends.
862.\"
863.It Fn m_split mbuf len how
864Partition an
865.Vt mbuf chain
866in two pieces, returning the tail:
867all but the first
868.Fa len
869bytes.
870In case of failure, it returns
871.Dv NULL
872and attempts to restore the
873.Vt mbuf chain
874to its original state.
875.\"
876.It Fn m_apply mbuf off len f arg
877Apply a function to an
878.Vt mbuf chain ,
879at offset
880.Fa off ,
881for length
882.Fa len
883bytes.
884Typically used to avoid calls to
885.Fn m_pullup
886which would otherwise be unnecessary or undesirable.
887.Fa arg
888is a convenience argument which is passed to the callback function
889.Fa f .
890.Pp
891Each time
892.Fn f
893is called, it will be passed
894.Fa arg ,
895a pointer to the
896.Fa data
897in the current mbuf, and the length
898.Fa len
899of the data in this mbuf to which the function should be applied.
900.Pp
901The function should return zero to indicate success;
902otherwise, if an error is indicated, then
903.Fn m_apply
904will return the error and stop iterating through the
905.Vt mbuf chain .
906.\"
907.It Fn m_getptr mbuf loc off
908Return a pointer to the mbuf containing the data located at
909.Fa loc
910bytes from the beginning of the
911.Vt mbuf chain .
912The corresponding offset into the mbuf will be stored in
913.Fa *off .
914.It Fn m_defrag m0 how
915Defragment an mbuf chain, returning the shortest possible
916chain of mbufs and clusters.
917If allocation fails and this can not be completed,
918.Dv NULL
919will be returned and the original chain will be unchanged.
920Upon success, the original chain will be freed and the new
921chain will be returned.
922.Fa how
923should be either
924.Dv M_TRYWAIT
925or
926.Dv M_DONTWAIT ,
927depending on the caller's preference.
928.Pp
929This function is especially useful in network drivers, where
930certain long mbuf chains must be shortened before being added
931to TX descriptor lists.
932.It Fn m_unshare m0 how
933Create a version of the specified mbuf chain whose
934contents can be safely modified without affecting other users.
935If allocation fails and this operation can not be completed,
936.Dv NULL
937will be returned.
938The original mbuf chain is always reclaimed and the reference
939count of any shared mbuf clusters is decremented.
940.Fa how
941should be either
942.Dv M_TRYWAIT
943or
944.Dv M_DONTWAIT ,
945depending on the caller's preference.
946As a side-effect of this process the returned
947mbuf chain may be compacted.
948.Pp
949This function is especially useful in the transmit path of
950network code, when data must be encrypted or otherwise
951altered prior to transmission.
952.El
953.Sh HARDWARE-ASSISTED CHECKSUM CALCULATION
954This section currently applies to TCP/IP only.
955In order to save the host CPU resources, computing checksums is
956offloaded to the network interface hardware if possible.
957The
958.Va m_pkthdr
959member of the leading
960.Vt mbuf
961of a packet contains two fields used for that purpose,
962.Vt int Va csum_flags
963and
964.Vt int Va csum_data .
965The meaning of those fields depends on the direction a packet flows in,
966and on whether the packet is fragmented.
967Henceforth,
968.Va csum_flags
969or
970.Va csum_data
971of a packet
972will denote the corresponding field of the
973.Va m_pkthdr
974member of the leading
975.Vt mbuf
976in the
977.Vt mbuf chain
978containing the packet.
979.Pp
980On output, checksum offloading is attempted after the outgoing
981interface has been determined for a packet.
982The interface-specific field
983.Va ifnet.if_data.ifi_hwassist
984(see
985.Xr ifnet 9 )
986is consulted for the capabilities of the interface to assist in
987computing checksums.
988The
989.Va csum_flags
990field of the packet header is set to indicate which actions the interface
991is supposed to perform on it.
992The actions unsupported by the network interface are done in the
993software prior to passing the packet down to the interface driver;
994such actions will never be requested through
995.Va csum_flags .
996.Pp
997The flags demanding a particular action from an interface are as follows:
998.Bl -tag -width ".Dv CSUM_TCP" -offset indent
999.It Dv CSUM_IP
1000The IP header checksum is to be computed and stored in the
1001corresponding field of the packet.
1002The hardware is expected to know the format of an IP header
1003to determine the offset of the IP checksum field.
1004.It Dv CSUM_TCP
1005The TCP checksum is to be computed.
1006(See below.)
1007.It Dv CSUM_UDP
1008The UDP checksum is to be computed.
1009(See below.)
1010.El
1011.Pp
1012Should a TCP or UDP checksum be offloaded to the hardware,
1013the field
1014.Va csum_data
1015will contain the byte offset of the checksum field relative to the
1016end of the IP header.
1017In this case, the checksum field will be initially
1018set by the TCP/IP module to the checksum of the pseudo header
1019defined by the TCP and UDP specifications.
1020.Pp
1021For outbound packets which have been fragmented
1022by the host CPU, the following will also be true,
1023regardless of the checksum flag settings:
1024.Bl -bullet -offset indent
1025.It
1026all fragments will have the flag
1027.Dv M_FRAG
1028set in their
1029.Va m_flags
1030field;
1031.It
1032the first and the last fragments in the chain will have
1033.Dv M_FIRSTFRAG
1034or
1035.Dv M_LASTFRAG
1036set in their
1037.Va m_flags ,
1038correspondingly;
1039.It
1040the first fragment in the chain will have the total number
1041of fragments contained in its
1042.Va csum_data
1043field.
1044.El
1045.Pp
1046The last rule for fragmented packets takes precedence over the one
1047for a TCP or UDP checksum.
1048Nevertheless, offloading a TCP or UDP checksum is possible for a
1049fragmented packet if the flag
1050.Dv CSUM_IP_FRAGS
1051is set in the field
1052.Va ifnet.if_data.ifi_hwassist
1053associated with the network interface.
1054However, in this case the interface is expected to figure out
1055the location of the checksum field within the sequence of fragments
1056by itself because
1057.Va csum_data
1058contains a fragment count instead of a checksum offset value.
1059.Pp
1060On input, an interface indicates the actions it has performed
1061on a packet by setting one or more of the following flags in
1062.Va csum_flags
1063associated with the packet:
1064.Bl -tag -width ".Dv CSUM_IP_CHECKED" -offset indent
1065.It Dv CSUM_IP_CHECKED
1066The IP header checksum has been computed.
1067.It Dv CSUM_IP_VALID
1068The IP header has a valid checksum.
1069This flag can appear only in combination with
1070.Dv CSUM_IP_CHECKED .
1071.It Dv CSUM_DATA_VALID
1072The checksum of the data portion of the IP packet has been computed
1073and stored in the field
1074.Va csum_data
1075in network byte order.
1076.It Dv CSUM_PSEUDO_HDR
1077Can be set only along with
1078.Dv CSUM_DATA_VALID
1079to indicate that the IP data checksum found in
1080.Va csum_data
1081allows for the pseudo header defined by the TCP and UDP specifications.
1082Otherwise the checksum of the pseudo header must be calculated by
1083the host CPU and added to
1084.Va csum_data
1085to obtain the final checksum to be used for TCP or UDP validation purposes.
1086.El
1087.Pp
1088If a particular network interface just indicates success or
1089failure of TCP or UDP checksum validation without returning
1090the exact value of the checksum to the host CPU, its driver can mark
1091.Dv CSUM_DATA_VALID
1092and
1093.Dv CSUM_PSEUDO_HDR
1094in
1095.Va csum_flags ,
1096and set
1097.Va csum_data
1098to
1099.Li 0xFFFF
1100hexadecimal to indicate a valid checksum.
1101It is a peculiarity of the algorithm used that the Internet checksum
1102calculated over any valid packet will be
1103.Li 0xFFFF
1104as long as the original checksum field is included.
1105.Pp
1106For inbound packets which are IP fragments, all
1107.Va csum_data
1108fields will be summed during reassembly to obtain the final checksum
1109value passed to an upper layer in the
1110.Va csum_data
1111field of the reassembled packet.
1112The
1113.Va csum_flags
1114fields of all fragments will be consolidated using logical AND
1115to obtain the final value for
1116.Va csum_flags .
1117Thus, in order to successfully
1118offload checksum computation for fragmented data,
1119all fragments should have the same value of
1120.Va csum_flags .
1121.Sh STRESS TESTING
1122When running a kernel compiled with the option
1123.Dv MBUF_STRESS_TEST ,
1124the following
1125.Xr sysctl 8 Ns
1126-controlled options may be used to create
1127various failure/extreme cases for testing of network drivers
1128and other parts of the kernel that rely on
1129.Vt mbufs .
1130.Bl -tag -width ident
1131.It Va net.inet.ip.mbuf_frag_size
1132Causes
1133.Fn ip_output
1134to fragment outgoing
1135.Vt mbuf chains
1136into fragments of the specified size.
1137Setting this variable to 1 is an excellent way to
1138test the long
1139.Vt mbuf chain
1140handling ability of network drivers.
1141.It Va kern.ipc.m_defragrandomfailures
1142Causes the function
1143.Fn m_defrag
1144to randomly fail, returning
1145.Dv NULL .
1146Any piece of code which uses
1147.Fn m_defrag
1148should be tested with this feature.
1149.El
1150.Sh RETURN VALUES
1151See above.
1152.Sh SEE ALSO
1153.Xr ifnet 9 ,
1154.Xr mbuf_tags 9
1155.Sh HISTORY
1156.\" Please correct me if I'm wrong
1157.Vt Mbufs
1158appeared in an early version of
1159.Bx .
1160Besides being used for network packets, they were used
1161to store various dynamic structures, such as routing table
1162entries, interface addresses, protocol control blocks, etc.
1163In more recent
1164.Fx
1165use of
1166.Vt mbufs
1167is almost entirely limited to packet storage, with
1168.Xr uma 9
1169zones being used directly to store other network-related memory.
1170.Pp
1171Historically, the
1172.Vt mbuf
1173allocator has been a special-purpose memory allocator able to run in
1174interrupt contexts and allocating from a special kernel address space map.
1175As of
1176.Fx 5.3 ,
1177the
1178.Vt mbuf
1179allocator is a wrapper around
1180.Xr uma 9 ,
1181allowing caching of
1182.Vt mbufs ,
1183clusters, and
1184.Vt mbuf
1185+ cluster pairs in per-CPU caches, as well as bringing other benefits of
1186slab allocation.
1187.Sh AUTHORS
1188The original
1189.Nm
1190manual page was written by Yar Tikhiy.
1191The
1192.Xr uma 9
1193.Vt mbuf
1194allocator was written by Bosko Milekic.
1195