xref: /freebsd/share/man/man9/mbuf.9 (revision 6d732c66bca5da4d261577aad2c8ea84519b0bea)
1.\" Copyright (c) 2000 FreeBSD Inc.
2.\" All rights reserved.
3.\"
4.\" Redistribution and use in source and binary forms, with or without
5.\" modification, are permitted provided that the following conditions
6.\" are met:
7.\" 1. Redistributions of source code must retain the above copyright
8.\"    notice, this list of conditions and the following disclaimer.
9.\" 2. Redistributions in binary form must reproduce the above copyright
10.\"    notice, this list of conditions and the following disclaimer in the
11.\"    documentation and/or other materials provided with the distribution.
12.\"
13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16.\" ARE DISCLAIMED.  IN NO EVENT SHALL [your name] OR CONTRIBUTORS BE LIABLE
17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23.\" SUCH DAMAGE.
24.\"
25.\" $FreeBSD$
26.\"
27.Dd December 20, 2013
28.Dt MBUF 9
29.Os
30.\"
31.Sh NAME
32.Nm mbuf
33.Nd "memory management in the kernel IPC subsystem"
34.\"
35.Sh SYNOPSIS
36.In sys/param.h
37.In sys/systm.h
38.In sys/mbuf.h
39.\"
40.Ss Mbuf allocation macros
41.Fn MGET "struct mbuf *mbuf" "int how" "short type"
42.Fn MGETHDR "struct mbuf *mbuf" "int how" "short type"
43.Fn MCLGET "struct mbuf *mbuf" "int how"
44.Fo MEXTADD
45.Fa "struct mbuf *mbuf"
46.Fa "caddr_t buf"
47.Fa "u_int size"
48.Fa "void (*free)(void *opt_arg1, void *opt_arg2)"
49.Fa "void *opt_arg1"
50.Fa "void *opt_arg2"
51.Fa "short flags"
52.Fa "int type"
53.Fc
54.Fn MEXTFREE "struct mbuf *mbuf"
55.Fn MFREE "struct mbuf *mbuf" "struct mbuf *successor"
56.\"
57.Ss Mbuf utility macros
58.Fn mtod "struct mbuf *mbuf" "type"
59.Fn M_ALIGN "struct mbuf *mbuf" "u_int len"
60.Fn MH_ALIGN "struct mbuf *mbuf" "u_int len"
61.Ft int
62.Fn M_LEADINGSPACE "struct mbuf *mbuf"
63.Ft int
64.Fn M_TRAILINGSPACE "struct mbuf *mbuf"
65.Fn M_MOVE_PKTHDR "struct mbuf *to" "struct mbuf *from"
66.Fn M_PREPEND "struct mbuf *mbuf" "int len" "int how"
67.Fn MCHTYPE "struct mbuf *mbuf" "u_int type"
68.Ft int
69.Fn M_WRITABLE "struct mbuf *mbuf"
70.\"
71.Ss Mbuf allocation functions
72.Ft struct mbuf *
73.Fn m_get "int how" "int type"
74.Ft struct mbuf *
75.Fn m_getm "struct mbuf *orig" "int len" "int how" "int type"
76.Ft struct mbuf *
77.Fn m_getjcl "int how" "short type" "int flags" "int size"
78.Ft struct mbuf *
79.Fn m_getcl "int how" "short type" "int flags"
80.Ft struct mbuf *
81.Fn m_getclr "int how" "int type"
82.Ft struct mbuf *
83.Fn m_gethdr "int how" "int type"
84.Ft struct mbuf *
85.Fn m_free "struct mbuf *mbuf"
86.Ft void
87.Fn m_freem "struct mbuf *mbuf"
88.\"
89.Ss Mbuf utility functions
90.Ft void
91.Fn m_adj "struct mbuf *mbuf" "int len"
92.Ft void
93.Fn m_align "struct mbuf *mbuf" "int len"
94.Ft int
95.Fn m_append "struct mbuf *mbuf" "int len" "c_caddr_t cp"
96.Ft struct mbuf *
97.Fn m_prepend "struct mbuf *mbuf" "int len" "int how"
98.Ft struct mbuf *
99.Fn m_copyup "struct mbuf *mbuf" "int len" "int dstoff"
100.Ft struct mbuf *
101.Fn m_pullup "struct mbuf *mbuf" "int len"
102.Ft struct mbuf *
103.Fn m_pulldown "struct mbuf *mbuf" "int offset" "int len" "int *offsetp"
104.Ft struct mbuf *
105.Fn m_copym "struct mbuf *mbuf" "int offset" "int len" "int how"
106.Ft struct mbuf *
107.Fn m_copypacket "struct mbuf *mbuf" "int how"
108.Ft struct mbuf *
109.Fn m_dup "struct mbuf *mbuf" "int how"
110.Ft void
111.Fn m_copydata "const struct mbuf *mbuf" "int offset" "int len" "caddr_t buf"
112.Ft void
113.Fn m_copyback "struct mbuf *mbuf" "int offset" "int len" "caddr_t buf"
114.Ft struct mbuf *
115.Fo m_devget
116.Fa "char *buf"
117.Fa "int len"
118.Fa "int offset"
119.Fa "struct ifnet *ifp"
120.Fa "void (*copy)(char *from, caddr_t to, u_int len)"
121.Fc
122.Ft void
123.Fn m_cat "struct mbuf *m" "struct mbuf *n"
124.Ft u_int
125.Fn m_fixhdr "struct mbuf *mbuf"
126.Ft void
127.Fn m_dup_pkthdr "struct mbuf *to" "struct mbuf *from"
128.Ft void
129.Fn m_move_pkthdr "struct mbuf *to" "struct mbuf *from"
130.Ft u_int
131.Fn m_length "struct mbuf *mbuf" "struct mbuf **last"
132.Ft struct mbuf *
133.Fn m_split "struct mbuf *mbuf" "int len" "int how"
134.Ft int
135.Fn m_apply "struct mbuf *mbuf" "int off" "int len" "int (*f)(void *arg, void *data, u_int len)" "void *arg"
136.Ft struct mbuf *
137.Fn m_getptr "struct mbuf *mbuf" "int loc" "int *off"
138.Ft struct mbuf *
139.Fn m_defrag "struct mbuf *m0" "int how"
140.Ft struct mbuf *
141.Fn m_unshare "struct mbuf *m0" "int how"
142.\"
143.Sh DESCRIPTION
144An
145.Vt mbuf
146is a basic unit of memory management in the kernel IPC subsystem.
147Network packets and socket buffers are stored in
148.Vt mbufs .
149A network packet may span multiple
150.Vt mbufs
151arranged into a
152.Vt mbuf chain
153(linked list),
154which allows adding or trimming
155network headers with little overhead.
156.Pp
157While a developer should not bother with
158.Vt mbuf
159internals without serious
160reason in order to avoid incompatibilities with future changes, it
161is useful to understand the general structure of an
162.Vt mbuf .
163.Pp
164An
165.Vt mbuf
166consists of a variable-sized header and a small internal
167buffer for data.
168The total size of an
169.Vt mbuf ,
170.Dv MSIZE ,
171is a constant defined in
172.In sys/param.h .
173The
174.Vt mbuf
175header includes:
176.Bl -tag -width "m_nextpkt" -offset indent
177.It Va m_next
178.Pq Vt struct mbuf *
179A pointer to the next
180.Vt mbuf
181in the
182.Vt mbuf chain .
183.It Va m_nextpkt
184.Pq Vt struct mbuf *
185A pointer to the next
186.Vt mbuf chain
187in the queue.
188.It Va m_data
189.Pq Vt caddr_t
190A pointer to data attached to this
191.Vt mbuf .
192.It Va m_len
193.Pq Vt int
194The length of the data.
195.It Va m_type
196.Pq Vt short
197The type of the data.
198.It Va m_flags
199.Pq Vt int
200The
201.Vt mbuf
202flags.
203.El
204.Pp
205The
206.Vt mbuf
207flag bits are defined as follows:
208.Bd -literal
209/* mbuf flags */
210#define	M_EXT		0x0001	/* has associated external storage */
211#define	M_PKTHDR	0x0002	/* start of record */
212#define	M_EOR		0x0004	/* end of record */
213#define	M_RDONLY	0x0008	/* associated data marked read-only */
214#define	M_PROTO1	0x0010	/* protocol-specific */
215#define	M_PROTO2	0x0020	/* protocol-specific */
216#define	M_PROTO3	0x0040	/* protocol-specific */
217#define	M_PROTO4	0x0080	/* protocol-specific */
218#define	M_PROTO5	0x0100	/* protocol-specific */
219#define	M_PROTO6	0x4000	/* protocol-specific (avoid M_BCAST conflict) */
220#define	M_FREELIST	0x8000	/* mbuf is on the free list */
221
222/* mbuf pkthdr flags (also stored in m_flags) */
223#define	M_BCAST		0x0200	/* send/received as link-level broadcast */
224#define	M_MCAST		0x0400	/* send/received as link-level multicast */
225#define	M_FRAG		0x0800	/* packet is fragment of larger packet */
226#define	M_FIRSTFRAG	0x1000	/* packet is first fragment */
227#define	M_LASTFRAG	0x2000	/* packet is last fragment */
228.Ed
229.Pp
230The available
231.Vt mbuf
232types are defined as follows:
233.Bd -literal
234/* mbuf types */
235#define	MT_DATA		1	/* dynamic (data) allocation */
236#define	MT_HEADER	MT_DATA	/* packet header */
237#define	MT_SONAME	8	/* socket name */
238#define	MT_CONTROL	14	/* extra-data protocol message */
239#define	MT_OOBDATA	15	/* expedited data */
240.Ed
241.Pp
242The available external buffer types are defined as follows:
243.Bd -literal
244/* external buffer types */
245#define EXT_CLUSTER	1	/* mbuf cluster */
246#define EXT_SFBUF	2	/* sendfile(2)'s sf_bufs */
247#define EXT_JUMBOP	3	/* jumbo cluster 4096 bytes */
248#define EXT_JUMBO9	4	/* jumbo cluster 9216 bytes */
249#define EXT_JUMBO16	5	/* jumbo cluster 16184 bytes */
250#define EXT_PACKET	6	/* mbuf+cluster from packet zone */
251#define EXT_MBUF	7	/* external mbuf reference (M_IOVEC) */
252#define EXT_NET_DRV	100	/* custom ext_buf provided by net driver(s) */
253#define EXT_MOD_TYPE	200	/* custom module's ext_buf type */
254#define EXT_DISPOSABLE	300	/* can throw this buffer away w/page flipping */
255#define EXT_EXTREF	400	/* has externally maintained ref_cnt ptr */
256.Ed
257.Pp
258If the
259.Dv M_PKTHDR
260flag is set, a
261.Vt struct pkthdr Va m_pkthdr
262is added to the
263.Vt mbuf
264header.
265It contains a pointer to the interface
266the packet has been received from
267.Pq Vt struct ifnet Va *rcvif ,
268and the total packet length
269.Pq Vt int Va len .
270Optionally, it may also contain an attached list of packet tags
271.Pq Vt "struct m_tag" .
272See
273.Xr mbuf_tags 9
274for details.
275Fields used in offloading checksum calculation to the hardware are kept in
276.Va m_pkthdr
277as well.
278See
279.Sx HARDWARE-ASSISTED CHECKSUM CALCULATION
280for details.
281.Pp
282If small enough, data is stored in the internal data buffer of an
283.Vt mbuf .
284If the data is sufficiently large, another
285.Vt mbuf
286may be added to the
287.Vt mbuf chain ,
288or external storage may be associated with the
289.Vt mbuf .
290.Dv MHLEN
291bytes of data can fit into an
292.Vt mbuf
293with the
294.Dv M_PKTHDR
295flag set,
296.Dv MLEN
297bytes can otherwise.
298.Pp
299If external storage is being associated with an
300.Vt mbuf ,
301the
302.Va m_ext
303header is added at the cost of losing the internal data buffer.
304It includes a pointer to external storage, the size of the storage,
305a pointer to a function used for freeing the storage,
306a pointer to an optional argument that can be passed to the function,
307and a pointer to a reference counter.
308An
309.Vt mbuf
310using external storage has the
311.Dv M_EXT
312flag set.
313.Pp
314The system supplies a macro for allocating the desired external storage
315buffer,
316.Dv MEXTADD .
317.Pp
318The allocation and management of the reference counter is handled by the
319subsystem.
320.Pp
321The system also supplies a default type of external storage buffer called an
322.Vt mbuf cluster .
323.Vt Mbuf clusters
324can be allocated and configured with the use of the
325.Dv MCLGET
326macro.
327Each
328.Vt mbuf cluster
329is
330.Dv MCLBYTES
331in size, where MCLBYTES is a machine-dependent constant.
332The system defines an advisory macro
333.Dv MINCLSIZE ,
334which is the smallest amount of data to put into an
335.Vt mbuf cluster .
336It is equal to
337.Dv MHLEN
338plus one.
339It is typically preferable to store data into the data region of an
340.Vt mbuf ,
341if size permits, as opposed to allocating a separate
342.Vt mbuf cluster
343to hold the same data.
344.\"
345.Ss Macros and Functions
346There are numerous predefined macros and functions that provide the
347developer with common utilities.
348.\"
349.Bl -ohang -offset indent
350.It Fn mtod mbuf type
351Convert an
352.Fa mbuf
353pointer to a data pointer.
354The macro expands to the data pointer cast to the pointer of the specified
355.Fa type .
356.Sy Note :
357It is advisable to ensure that there is enough contiguous data in
358.Fa mbuf .
359See
360.Fn m_pullup
361for details.
362.It Fn MGET mbuf how type
363Allocate an
364.Vt mbuf
365and initialize it to contain internal data.
366.Fa mbuf
367will point to the allocated
368.Vt mbuf
369on success, or be set to
370.Dv NULL
371on failure.
372The
373.Fa how
374argument is to be set to
375.Dv M_WAITOK
376or
377.Dv M_NOWAIT .
378It specifies whether the caller is willing to block if necessary.
379A number of other functions and macros related to
380.Vt mbufs
381have the same argument because they may
382at some point need to allocate new
383.Vt mbufs .
384.Pp
385Historical
386.Vt mbuf
387allocator (See
388.Sx HISTORY
389section) used allocation flags
390.Dv M_WAIT
391and
392.Dv M_DONTWAIT .
393These constants are kept for compatibility
394and their use in new code is discouraged.
395.It Fn MGETHDR mbuf how type
396Allocate an
397.Vt mbuf
398and initialize it to contain a packet header
399and internal data.
400See
401.Fn MGET
402for details.
403.It Fn MEXTADD mbuf buf size free opt_arg1 opt_arg2 flags type
404Associate externally managed data with
405.Fa mbuf .
406Any internal data contained in the mbuf will be discarded, and the
407.Dv M_EXT
408flag will be set.
409The
410.Fa buf
411and
412.Fa size
413arguments are the address and length, respectively, of the data.
414The
415.Fa free
416argument points to a function which will be called to free the data
417when the mbuf is freed; it is only used if
418.Fa type
419is
420.Dv EXT_EXTREF .
421The
422.Fa opt_arg1
423and
424.Fa opt_arg2
425arguments will be passed unmodified to
426.Fa free .
427The
428.Fa flags
429argument specifies additional
430.Vt mbuf
431flags; it is not necessary to specify
432.Dv M_EXT .
433Finally, the
434.Fa type
435argument specifies the type of external data, which controls how it
436will be disposed of when the
437.Vt mbuf
438is freed.
439In most cases, the correct value is
440.Dv EXT_EXTREF .
441.It Fn MCLGET mbuf how
442Allocate and attach an
443.Vt mbuf cluster
444to
445.Fa mbuf .
446If the macro fails, the
447.Dv M_EXT
448flag will not be set in
449.Fa mbuf .
450.It Fn M_ALIGN mbuf len
451Set the pointer
452.Fa mbuf->m_data
453to place an object of the size
454.Fa len
455at the end of the internal data area of
456.Fa mbuf ,
457long word aligned.
458Applicable only if
459.Fa mbuf
460is newly allocated with
461.Fn MGET
462or
463.Fn m_get .
464.It Fn MH_ALIGN mbuf len
465Serves the same purpose as
466.Fn M_ALIGN
467does, but only for
468.Fa mbuf
469newly allocated with
470.Fn MGETHDR
471or
472.Fn m_gethdr ,
473or initialized by
474.Fn m_dup_pkthdr
475or
476.Fn m_move_pkthdr .
477.It Fn m_align mbuf len
478Services the same purpose as
479.Fn M_ALIGN
480but handles any type of mbuf.
481.It Fn M_LEADINGSPACE mbuf
482Returns the number of bytes available before the beginning
483of data in
484.Fa mbuf .
485.It Fn M_TRAILINGSPACE mbuf
486Returns the number of bytes available after the end of data in
487.Fa mbuf .
488.It Fn M_PREPEND mbuf len how
489This macro operates on an
490.Vt mbuf chain .
491It is an optimized wrapper for
492.Fn m_prepend
493that can make use of possible empty space before data
494(e.g.\& left after trimming of a link-layer header).
495The new
496.Vt mbuf chain
497pointer or
498.Dv NULL
499is in
500.Fa mbuf
501after the call.
502.It Fn M_MOVE_PKTHDR to from
503Using this macro is equivalent to calling
504.Fn m_move_pkthdr to from .
505.It Fn M_WRITABLE mbuf
506This macro will evaluate true if
507.Fa mbuf
508is not marked
509.Dv M_RDONLY
510and if either
511.Fa mbuf
512does not contain external storage or,
513if it does,
514then if the reference count of the storage is not greater than 1.
515The
516.Dv M_RDONLY
517flag can be set in
518.Fa mbuf->m_flags .
519This can be achieved during setup of the external storage,
520by passing the
521.Dv M_RDONLY
522bit as a
523.Fa flags
524argument to the
525.Fn MEXTADD
526macro, or can be directly set in individual
527.Vt mbufs .
528.It Fn MCHTYPE mbuf type
529Change the type of
530.Fa mbuf
531to
532.Fa type .
533This is a relatively expensive operation and should be avoided.
534.El
535.Pp
536The functions are:
537.Bl -ohang -offset indent
538.It Fn m_get how type
539A function version of
540.Fn MGET
541for non-critical paths.
542.It Fn m_getm orig len how type
543Allocate
544.Fa len
545bytes worth of
546.Vt mbufs
547and
548.Vt mbuf clusters
549if necessary and append the resulting allocated
550.Vt mbuf chain
551to the
552.Vt mbuf chain
553.Fa orig ,
554if it is
555.No non- Ns Dv NULL .
556If the allocation fails at any point,
557free whatever was allocated and return
558.Dv NULL .
559If
560.Fa orig
561is
562.No non- Ns Dv NULL ,
563it will not be freed.
564It is possible to use
565.Fn m_getm
566to either append
567.Fa len
568bytes to an existing
569.Vt mbuf
570or
571.Vt mbuf chain
572(for example, one which may be sitting in a pre-allocated ring)
573or to simply perform an all-or-nothing
574.Vt mbuf
575and
576.Vt mbuf cluster
577allocation.
578.It Fn m_gethdr how type
579A function version of
580.Fn MGETHDR
581for non-critical paths.
582.It Fn m_getcl how type flags
583Fetch an
584.Vt mbuf
585with a
586.Vt mbuf cluster
587attached to it.
588If one of the allocations fails, the entire allocation fails.
589This routine is the preferred way of fetching both the
590.Vt mbuf
591and
592.Vt mbuf cluster
593together, as it avoids having to unlock/relock between allocations.
594Returns
595.Dv NULL
596on failure.
597.It Fn m_getjcl how type flags size
598This is like
599.Fn m_getcl
600but it the size of the cluster allocated will be large enough for
601.Fa size
602bytes.
603.It Fn m_getclr how type
604Allocate an
605.Vt mbuf
606and zero out the data region.
607.It Fn m_free mbuf
608Frees
609.Vt mbuf .
610Returns
611.Va m_next
612of the freed
613.Vt mbuf .
614.El
615.Pp
616The functions below operate on
617.Vt mbuf chains .
618.Bl -ohang -offset indent
619.It Fn m_freem mbuf
620Free an entire
621.Vt mbuf chain ,
622including any external storage.
623.\"
624.It Fn m_adj mbuf len
625Trim
626.Fa len
627bytes from the head of an
628.Vt mbuf chain
629if
630.Fa len
631is positive, from the tail otherwise.
632.\"
633.It Fn m_append mbuf len cp
634Append
635.Vt len
636bytes of data
637.Vt cp
638to the
639.Vt mbuf chain .
640Extend the mbuf chain if the new data does not fit in
641existing space.
642.\"
643.It Fn m_prepend mbuf len how
644Allocate a new
645.Vt mbuf
646and prepend it to the
647.Vt mbuf chain ,
648handle
649.Dv M_PKTHDR
650properly.
651.Sy Note :
652It does not allocate any
653.Vt mbuf clusters ,
654so
655.Fa len
656must be less than
657.Dv MLEN
658or
659.Dv MHLEN ,
660depending on the
661.Dv M_PKTHDR
662flag setting.
663.\"
664.It Fn m_copyup mbuf len dstoff
665Similar to
666.Fn m_pullup
667but copies
668.Fa len
669bytes of data into a new mbuf at
670.Fa dstoff
671bytes into the mbuf.
672The
673.Fa dstoff
674argument aligns the data and leaves room for a link layer header.
675Returns the new
676.Vt mbuf chain
677on success,
678and frees the
679.Vt mbuf chain
680and returns
681.Dv NULL
682on failure.
683.Sy Note :
684The function does not allocate
685.Vt mbuf clusters ,
686so
687.Fa len + dstoff
688must be less than
689.Dv MHLEN .
690.\"
691.It Fn m_pullup mbuf len
692Arrange that the first
693.Fa len
694bytes of an
695.Vt mbuf chain
696are contiguous and lay in the data area of
697.Fa mbuf ,
698so they are accessible with
699.Fn mtod mbuf type .
700It is important to remember that this may involve
701reallocating some mbufs and moving data so all pointers
702referencing data within the old mbuf chain
703must be recalculated or made invalid.
704Return the new
705.Vt mbuf chain
706on success,
707.Dv NULL
708on failure
709(the
710.Vt mbuf chain
711is freed in this case).
712.Sy Note :
713It does not allocate any
714.Vt mbuf clusters ,
715so
716.Fa len
717must be less than or equal to
718.Dv MHLEN .
719.\"
720.It Fn m_pulldown mbuf offset len offsetp
721Arrange that
722.Fa len
723bytes between
724.Fa offset
725and
726.Fa offset + len
727in the
728.Vt mbuf chain
729are contiguous and lay in the data area of
730.Fa mbuf ,
731so they are accessible with
732.Fn mtod mbuf type .
733.Fa len
734must be smaller than, or equal to, the size of an
735.Vt mbuf cluster .
736Return a pointer to an intermediate
737.Vt mbuf
738in the chain containing the requested region;
739the offset in the data region of the
740.Vt mbuf chain
741to the data contained in the returned mbuf is stored in
742.Fa *offsetp .
743If
744.Fa offp
745is NULL, the region may be accessed using
746.Fn mtod mbuf type .
747If
748.Fa offp
749is non-NULL, the region may be accessed using
750.Fn mtod mbuf uint8_t + *offsetp .
751The region of the mbuf chain between its beginning and
752.Fa off
753is not modified, therefore it is safe to hold pointers to data within
754this region before calling
755.Fn m_pulldown .
756.\"
757.It Fn m_copym mbuf offset len how
758Make a copy of an
759.Vt mbuf chain
760starting
761.Fa offset
762bytes from the beginning, continuing for
763.Fa len
764bytes.
765If
766.Fa len
767is
768.Dv M_COPYALL ,
769copy to the end of the
770.Vt mbuf chain .
771.Sy Note :
772The copy is read-only, because the
773.Vt mbuf clusters
774are not copied, only their reference counts are incremented.
775.\"
776.It Fn m_copypacket mbuf how
777Copy an entire packet including header, which must be present.
778This is an optimized version of the common case
779.Fn m_copym mbuf 0 M_COPYALL how .
780.Sy Note :
781the copy is read-only, because the
782.Vt mbuf clusters
783are not copied, only their reference counts are incremented.
784.\"
785.It Fn m_dup mbuf how
786Copy a packet header
787.Vt mbuf chain
788into a completely new
789.Vt mbuf chain ,
790including copying any
791.Vt mbuf clusters .
792Use this instead of
793.Fn m_copypacket
794when you need a writable copy of an
795.Vt mbuf chain .
796.\"
797.It Fn m_copydata mbuf offset len buf
798Copy data from an
799.Vt mbuf chain
800starting
801.Fa off
802bytes from the beginning, continuing for
803.Fa len
804bytes, into the indicated buffer
805.Fa buf .
806.\"
807.It Fn m_copyback mbuf offset len buf
808Copy
809.Fa len
810bytes from the buffer
811.Fa buf
812back into the indicated
813.Vt mbuf chain ,
814starting at
815.Fa offset
816bytes from the beginning of the
817.Vt mbuf chain ,
818extending the
819.Vt mbuf chain
820if necessary.
821.Sy Note :
822It does not allocate any
823.Vt mbuf clusters ,
824just adds
825.Vt mbufs
826to the
827.Vt mbuf chain .
828It is safe to set
829.Fa offset
830beyond the current
831.Vt mbuf chain
832end: zeroed
833.Vt mbufs
834will be allocated to fill the space.
835.\"
836.It Fn m_length mbuf last
837Return the length of the
838.Vt mbuf chain ,
839and optionally a pointer to the last
840.Vt mbuf .
841.\"
842.It Fn m_dup_pkthdr to from how
843Upon the function's completion, the
844.Vt mbuf
845.Fa to
846will contain an identical copy of
847.Fa from->m_pkthdr
848and the per-packet attributes found in the
849.Vt mbuf chain
850.Fa from .
851The
852.Vt mbuf
853.Fa from
854must have the flag
855.Dv M_PKTHDR
856initially set, and
857.Fa to
858must be empty on entry.
859.\"
860.It Fn m_move_pkthdr to from
861Move
862.Va m_pkthdr
863and the per-packet attributes from the
864.Vt mbuf chain
865.Fa from
866to the
867.Vt mbuf
868.Fa to .
869The
870.Vt mbuf
871.Fa from
872must have the flag
873.Dv M_PKTHDR
874initially set, and
875.Fa to
876must be empty on entry.
877Upon the function's completion,
878.Fa from
879will have the flag
880.Dv M_PKTHDR
881and the per-packet attributes cleared.
882.\"
883.It Fn m_fixhdr mbuf
884Set the packet-header length to the length of the
885.Vt mbuf chain .
886.\"
887.It Fn m_devget buf len offset ifp copy
888Copy data from a device local memory pointed to by
889.Fa buf
890to an
891.Vt mbuf chain .
892The copy is done using a specified copy routine
893.Fa copy ,
894or
895.Fn bcopy
896if
897.Fa copy
898is
899.Dv NULL .
900.\"
901.It Fn m_cat m n
902Concatenate
903.Fa n
904to
905.Fa m .
906Both
907.Vt mbuf chains
908must be of the same type.
909.Fa N
910is still valid after the function returned.
911.Sy Note :
912It does not handle
913.Dv M_PKTHDR
914and friends.
915.\"
916.It Fn m_split mbuf len how
917Partition an
918.Vt mbuf chain
919in two pieces, returning the tail:
920all but the first
921.Fa len
922bytes.
923In case of failure, it returns
924.Dv NULL
925and attempts to restore the
926.Vt mbuf chain
927to its original state.
928.\"
929.It Fn m_apply mbuf off len f arg
930Apply a function to an
931.Vt mbuf chain ,
932at offset
933.Fa off ,
934for length
935.Fa len
936bytes.
937Typically used to avoid calls to
938.Fn m_pullup
939which would otherwise be unnecessary or undesirable.
940.Fa arg
941is a convenience argument which is passed to the callback function
942.Fa f .
943.Pp
944Each time
945.Fn f
946is called, it will be passed
947.Fa arg ,
948a pointer to the
949.Fa data
950in the current mbuf, and the length
951.Fa len
952of the data in this mbuf to which the function should be applied.
953.Pp
954The function should return zero to indicate success;
955otherwise, if an error is indicated, then
956.Fn m_apply
957will return the error and stop iterating through the
958.Vt mbuf chain .
959.\"
960.It Fn m_getptr mbuf loc off
961Return a pointer to the mbuf containing the data located at
962.Fa loc
963bytes from the beginning of the
964.Vt mbuf chain .
965The corresponding offset into the mbuf will be stored in
966.Fa *off .
967.It Fn m_defrag m0 how
968Defragment an mbuf chain, returning the shortest possible
969chain of mbufs and clusters.
970If allocation fails and this can not be completed,
971.Dv NULL
972will be returned and the original chain will be unchanged.
973Upon success, the original chain will be freed and the new
974chain will be returned.
975.Fa how
976should be either
977.Dv M_WAITOK
978or
979.Dv M_NOWAIT ,
980depending on the caller's preference.
981.Pp
982This function is especially useful in network drivers, where
983certain long mbuf chains must be shortened before being added
984to TX descriptor lists.
985.It Fn m_unshare m0 how
986Create a version of the specified mbuf chain whose
987contents can be safely modified without affecting other users.
988If allocation fails and this operation can not be completed,
989.Dv NULL
990will be returned.
991The original mbuf chain is always reclaimed and the reference
992count of any shared mbuf clusters is decremented.
993.Fa how
994should be either
995.Dv M_WAITOK
996or
997.Dv M_NOWAIT ,
998depending on the caller's preference.
999As a side-effect of this process the returned
1000mbuf chain may be compacted.
1001.Pp
1002This function is especially useful in the transmit path of
1003network code, when data must be encrypted or otherwise
1004altered prior to transmission.
1005.El
1006.Sh HARDWARE-ASSISTED CHECKSUM CALCULATION
1007This section currently applies to TCP/IP only.
1008In order to save the host CPU resources, computing checksums is
1009offloaded to the network interface hardware if possible.
1010The
1011.Va m_pkthdr
1012member of the leading
1013.Vt mbuf
1014of a packet contains two fields used for that purpose,
1015.Vt int Va csum_flags
1016and
1017.Vt int Va csum_data .
1018The meaning of those fields depends on the direction a packet flows in,
1019and on whether the packet is fragmented.
1020Henceforth,
1021.Va csum_flags
1022or
1023.Va csum_data
1024of a packet
1025will denote the corresponding field of the
1026.Va m_pkthdr
1027member of the leading
1028.Vt mbuf
1029in the
1030.Vt mbuf chain
1031containing the packet.
1032.Pp
1033On output, checksum offloading is attempted after the outgoing
1034interface has been determined for a packet.
1035The interface-specific field
1036.Va ifnet.if_data.ifi_hwassist
1037(see
1038.Xr ifnet 9 )
1039is consulted for the capabilities of the interface to assist in
1040computing checksums.
1041The
1042.Va csum_flags
1043field of the packet header is set to indicate which actions the interface
1044is supposed to perform on it.
1045The actions unsupported by the network interface are done in the
1046software prior to passing the packet down to the interface driver;
1047such actions will never be requested through
1048.Va csum_flags .
1049.Pp
1050The flags demanding a particular action from an interface are as follows:
1051.Bl -tag -width ".Dv CSUM_TCP" -offset indent
1052.It Dv CSUM_IP
1053The IP header checksum is to be computed and stored in the
1054corresponding field of the packet.
1055The hardware is expected to know the format of an IP header
1056to determine the offset of the IP checksum field.
1057.It Dv CSUM_TCP
1058The TCP checksum is to be computed.
1059(See below.)
1060.It Dv CSUM_UDP
1061The UDP checksum is to be computed.
1062(See below.)
1063.El
1064.Pp
1065Should a TCP or UDP checksum be offloaded to the hardware,
1066the field
1067.Va csum_data
1068will contain the byte offset of the checksum field relative to the
1069end of the IP header.
1070In this case, the checksum field will be initially
1071set by the TCP/IP module to the checksum of the pseudo header
1072defined by the TCP and UDP specifications.
1073.Pp
1074On input, an interface indicates the actions it has performed
1075on a packet by setting one or more of the following flags in
1076.Va csum_flags
1077associated with the packet:
1078.Bl -tag -width ".Dv CSUM_IP_CHECKED" -offset indent
1079.It Dv CSUM_IP_CHECKED
1080The IP header checksum has been computed.
1081.It Dv CSUM_IP_VALID
1082The IP header has a valid checksum.
1083This flag can appear only in combination with
1084.Dv CSUM_IP_CHECKED .
1085.It Dv CSUM_DATA_VALID
1086The checksum of the data portion of the IP packet has been computed
1087and stored in the field
1088.Va csum_data
1089in network byte order.
1090.It Dv CSUM_PSEUDO_HDR
1091Can be set only along with
1092.Dv CSUM_DATA_VALID
1093to indicate that the IP data checksum found in
1094.Va csum_data
1095allows for the pseudo header defined by the TCP and UDP specifications.
1096Otherwise the checksum of the pseudo header must be calculated by
1097the host CPU and added to
1098.Va csum_data
1099to obtain the final checksum to be used for TCP or UDP validation purposes.
1100.El
1101.Pp
1102If a particular network interface just indicates success or
1103failure of TCP or UDP checksum validation without returning
1104the exact value of the checksum to the host CPU, its driver can mark
1105.Dv CSUM_DATA_VALID
1106and
1107.Dv CSUM_PSEUDO_HDR
1108in
1109.Va csum_flags ,
1110and set
1111.Va csum_data
1112to
1113.Li 0xFFFF
1114hexadecimal to indicate a valid checksum.
1115It is a peculiarity of the algorithm used that the Internet checksum
1116calculated over any valid packet will be
1117.Li 0xFFFF
1118as long as the original checksum field is included.
1119.Sh STRESS TESTING
1120When running a kernel compiled with the option
1121.Dv MBUF_STRESS_TEST ,
1122the following
1123.Xr sysctl 8 Ns
1124-controlled options may be used to create
1125various failure/extreme cases for testing of network drivers
1126and other parts of the kernel that rely on
1127.Vt mbufs .
1128.Bl -tag -width ident
1129.It Va net.inet.ip.mbuf_frag_size
1130Causes
1131.Fn ip_output
1132to fragment outgoing
1133.Vt mbuf chains
1134into fragments of the specified size.
1135Setting this variable to 1 is an excellent way to
1136test the long
1137.Vt mbuf chain
1138handling ability of network drivers.
1139.It Va kern.ipc.m_defragrandomfailures
1140Causes the function
1141.Fn m_defrag
1142to randomly fail, returning
1143.Dv NULL .
1144Any piece of code which uses
1145.Fn m_defrag
1146should be tested with this feature.
1147.El
1148.Sh RETURN VALUES
1149See above.
1150.Sh SEE ALSO
1151.Xr ifnet 9 ,
1152.Xr mbuf_tags 9
1153.Sh HISTORY
1154.\" Please correct me if I'm wrong
1155.Vt Mbufs
1156appeared in an early version of
1157.Bx .
1158Besides being used for network packets, they were used
1159to store various dynamic structures, such as routing table
1160entries, interface addresses, protocol control blocks, etc.
1161In more recent
1162.Fx
1163use of
1164.Vt mbufs
1165is almost entirely limited to packet storage, with
1166.Xr uma 9
1167zones being used directly to store other network-related memory.
1168.Pp
1169Historically, the
1170.Vt mbuf
1171allocator has been a special-purpose memory allocator able to run in
1172interrupt contexts and allocating from a special kernel address space map.
1173As of
1174.Fx 5.3 ,
1175the
1176.Vt mbuf
1177allocator is a wrapper around
1178.Xr uma 9 ,
1179allowing caching of
1180.Vt mbufs ,
1181clusters, and
1182.Vt mbuf
1183+ cluster pairs in per-CPU caches, as well as bringing other benefits of
1184slab allocation.
1185.Sh AUTHORS
1186The original
1187.Nm
1188manual page was written by Yar Tikhiy.
1189The
1190.Xr uma 9
1191.Vt mbuf
1192allocator was written by Bosko Milekic.
1193