xref: /freebsd/share/man/man9/mbuf.9 (revision 0572ccaa4543b0abef8ef81e384c1d04de9f3da1)
1.\" Copyright (c) 2000 FreeBSD Inc.
2.\" All rights reserved.
3.\"
4.\" Redistribution and use in source and binary forms, with or without
5.\" modification, are permitted provided that the following conditions
6.\" are met:
7.\" 1. Redistributions of source code must retain the above copyright
8.\"    notice, this list of conditions and the following disclaimer.
9.\" 2. Redistributions in binary form must reproduce the above copyright
10.\"    notice, this list of conditions and the following disclaimer in the
11.\"    documentation and/or other materials provided with the distribution.
12.\"
13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16.\" ARE DISCLAIMED.  IN NO EVENT SHALL [your name] OR CONTRIBUTORS BE LIABLE
17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23.\" SUCH DAMAGE.
24.\"
25.\" $FreeBSD$
26.\"
27.Dd April 30, 2014
28.Dt MBUF 9
29.Os
30.\"
31.Sh NAME
32.Nm mbuf
33.Nd "memory management in the kernel IPC subsystem"
34.\"
35.Sh SYNOPSIS
36.In sys/param.h
37.In sys/systm.h
38.In sys/mbuf.h
39.\"
40.Ss Mbuf allocation macros
41.Fn MGET "struct mbuf *mbuf" "int how" "short type"
42.Fn MGETHDR "struct mbuf *mbuf" "int how" "short type"
43.Fn MCLGET "struct mbuf *mbuf" "int how"
44.Fo MEXTADD
45.Fa "struct mbuf *mbuf"
46.Fa "caddr_t buf"
47.Fa "u_int size"
48.Fa "void (*free)(void *opt_arg1, void *opt_arg2)"
49.Fa "void *opt_arg1"
50.Fa "void *opt_arg2"
51.Fa "short flags"
52.Fa "int type"
53.Fc
54.Fn MEXTFREE "struct mbuf *mbuf"
55.Fn MFREE "struct mbuf *mbuf" "struct mbuf *successor"
56.\"
57.Ss Mbuf utility macros
58.Fn mtod "struct mbuf *mbuf" "type"
59.Fn M_ALIGN "struct mbuf *mbuf" "u_int len"
60.Fn MH_ALIGN "struct mbuf *mbuf" "u_int len"
61.Ft int
62.Fn M_LEADINGSPACE "struct mbuf *mbuf"
63.Ft int
64.Fn M_TRAILINGSPACE "struct mbuf *mbuf"
65.Fn M_MOVE_PKTHDR "struct mbuf *to" "struct mbuf *from"
66.Fn M_PREPEND "struct mbuf *mbuf" "int len" "int how"
67.Fn MCHTYPE "struct mbuf *mbuf" "short type"
68.Ft int
69.Fn M_WRITABLE "struct mbuf *mbuf"
70.\"
71.Ss Mbuf allocation functions
72.Ft struct mbuf *
73.Fn m_get "int how" "short type"
74.Ft struct mbuf *
75.Fn m_get2 "int size" "int how" "short type" "int flags"
76.Ft struct mbuf *
77.Fn m_getm "struct mbuf *orig" "int len" "int how" "short type"
78.Ft struct mbuf *
79.Fn m_getjcl "int how" "short type" "int flags" "int size"
80.Ft struct mbuf *
81.Fn m_getcl "int how" "short type" "int flags"
82.Ft struct mbuf *
83.Fn m_getclr "int how" "short type"
84.Ft struct mbuf *
85.Fn m_gethdr "int how" "short type"
86.Ft struct mbuf *
87.Fn m_free "struct mbuf *mbuf"
88.Ft void
89.Fn m_freem "struct mbuf *mbuf"
90.\"
91.Ss Mbuf utility functions
92.Ft void
93.Fn m_adj "struct mbuf *mbuf" "int len"
94.Ft void
95.Fn m_align "struct mbuf *mbuf" "int len"
96.Ft int
97.Fn m_append "struct mbuf *mbuf" "int len" "c_caddr_t cp"
98.Ft struct mbuf *
99.Fn m_prepend "struct mbuf *mbuf" "int len" "int how"
100.Ft struct mbuf *
101.Fn m_copyup "struct mbuf *mbuf" "int len" "int dstoff"
102.Ft struct mbuf *
103.Fn m_pullup "struct mbuf *mbuf" "int len"
104.Ft struct mbuf *
105.Fn m_pulldown "struct mbuf *mbuf" "int offset" "int len" "int *offsetp"
106.Ft struct mbuf *
107.Fn m_copym "struct mbuf *mbuf" "int offset" "int len" "int how"
108.Ft struct mbuf *
109.Fn m_copypacket "struct mbuf *mbuf" "int how"
110.Ft struct mbuf *
111.Fn m_dup "struct mbuf *mbuf" "int how"
112.Ft void
113.Fn m_copydata "const struct mbuf *mbuf" "int offset" "int len" "caddr_t buf"
114.Ft void
115.Fn m_copyback "struct mbuf *mbuf" "int offset" "int len" "caddr_t buf"
116.Ft struct mbuf *
117.Fo m_devget
118.Fa "char *buf"
119.Fa "int len"
120.Fa "int offset"
121.Fa "struct ifnet *ifp"
122.Fa "void (*copy)(char *from, caddr_t to, u_int len)"
123.Fc
124.Ft void
125.Fn m_cat "struct mbuf *m" "struct mbuf *n"
126.Ft u_int
127.Fn m_fixhdr "struct mbuf *mbuf"
128.Ft void
129.Fn m_dup_pkthdr "struct mbuf *to" "struct mbuf *from"
130.Ft void
131.Fn m_move_pkthdr "struct mbuf *to" "struct mbuf *from"
132.Ft u_int
133.Fn m_length "struct mbuf *mbuf" "struct mbuf **last"
134.Ft struct mbuf *
135.Fn m_split "struct mbuf *mbuf" "int len" "int how"
136.Ft int
137.Fn m_apply "struct mbuf *mbuf" "int off" "int len" "int (*f)(void *arg, void *data, u_int len)" "void *arg"
138.Ft struct mbuf *
139.Fn m_getptr "struct mbuf *mbuf" "int loc" "int *off"
140.Ft struct mbuf *
141.Fn m_defrag "struct mbuf *m0" "int how"
142.Ft struct mbuf *
143.Fn m_unshare "struct mbuf *m0" "int how"
144.\"
145.Sh DESCRIPTION
146An
147.Vt mbuf
148is a basic unit of memory management in the kernel IPC subsystem.
149Network packets and socket buffers are stored in
150.Vt mbufs .
151A network packet may span multiple
152.Vt mbufs
153arranged into a
154.Vt mbuf chain
155(linked list),
156which allows adding or trimming
157network headers with little overhead.
158.Pp
159While a developer should not bother with
160.Vt mbuf
161internals without serious
162reason in order to avoid incompatibilities with future changes, it
163is useful to understand the general structure of an
164.Vt mbuf .
165.Pp
166An
167.Vt mbuf
168consists of a variable-sized header and a small internal
169buffer for data.
170The total size of an
171.Vt mbuf ,
172.Dv MSIZE ,
173is a constant defined in
174.In sys/param.h .
175The
176.Vt mbuf
177header includes:
178.Bl -tag -width "m_nextpkt" -offset indent
179.It Va m_next
180.Pq Vt struct mbuf *
181A pointer to the next
182.Vt mbuf
183in the
184.Vt mbuf chain .
185.It Va m_nextpkt
186.Pq Vt struct mbuf *
187A pointer to the next
188.Vt mbuf chain
189in the queue.
190.It Va m_data
191.Pq Vt caddr_t
192A pointer to data attached to this
193.Vt mbuf .
194.It Va m_len
195.Pq Vt int
196The length of the data.
197.It Va m_type
198.Pq Vt short
199The type of the data.
200.It Va m_flags
201.Pq Vt int
202The
203.Vt mbuf
204flags.
205.El
206.Pp
207The
208.Vt mbuf
209flag bits are defined as follows:
210.Bd -literal
211/* mbuf flags */
212#define	M_EXT		0x0001	/* has associated external storage */
213#define	M_PKTHDR	0x0002	/* start of record */
214#define	M_EOR		0x0004	/* end of record */
215#define	M_RDONLY	0x0008	/* associated data marked read-only */
216#define	M_PROTO1	0x0010	/* protocol-specific */
217#define	M_PROTO2	0x0020	/* protocol-specific */
218#define	M_PROTO3	0x0040	/* protocol-specific */
219#define	M_PROTO4	0x0080	/* protocol-specific */
220#define	M_PROTO5	0x0100	/* protocol-specific */
221#define	M_PROTO6	0x4000	/* protocol-specific (avoid M_BCAST conflict) */
222#define	M_FREELIST	0x8000	/* mbuf is on the free list */
223
224/* mbuf pkthdr flags (also stored in m_flags) */
225#define	M_BCAST		0x0200	/* send/received as link-level broadcast */
226#define	M_MCAST		0x0400	/* send/received as link-level multicast */
227#define	M_FRAG		0x0800	/* packet is fragment of larger packet */
228#define	M_FIRSTFRAG	0x1000	/* packet is first fragment */
229#define	M_LASTFRAG	0x2000	/* packet is last fragment */
230.Ed
231.Pp
232The available
233.Vt mbuf
234types are defined as follows:
235.Bd -literal
236/* mbuf types */
237#define	MT_DATA		1	/* dynamic (data) allocation */
238#define	MT_HEADER	MT_DATA	/* packet header */
239#define	MT_SONAME	8	/* socket name */
240#define	MT_CONTROL	14	/* extra-data protocol message */
241#define	MT_OOBDATA	15	/* expedited data */
242.Ed
243.Pp
244The available external buffer types are defined as follows:
245.Bd -literal
246/* external buffer types */
247#define EXT_CLUSTER	1	/* mbuf cluster */
248#define EXT_SFBUF	2	/* sendfile(2)'s sf_bufs */
249#define EXT_JUMBOP	3	/* jumbo cluster 4096 bytes */
250#define EXT_JUMBO9	4	/* jumbo cluster 9216 bytes */
251#define EXT_JUMBO16	5	/* jumbo cluster 16184 bytes */
252#define EXT_PACKET	6	/* mbuf+cluster from packet zone */
253#define EXT_MBUF	7	/* external mbuf reference (M_IOVEC) */
254#define EXT_NET_DRV	100	/* custom ext_buf provided by net driver(s) */
255#define EXT_MOD_TYPE	200	/* custom module's ext_buf type */
256#define EXT_DISPOSABLE	300	/* can throw this buffer away w/page flipping */
257#define EXT_EXTREF	400	/* has externally maintained ref_cnt ptr */
258.Ed
259.Pp
260If the
261.Dv M_PKTHDR
262flag is set, a
263.Vt struct pkthdr Va m_pkthdr
264is added to the
265.Vt mbuf
266header.
267It contains a pointer to the interface
268the packet has been received from
269.Pq Vt struct ifnet Va *rcvif ,
270and the total packet length
271.Pq Vt int Va len .
272Optionally, it may also contain an attached list of packet tags
273.Pq Vt "struct m_tag" .
274See
275.Xr mbuf_tags 9
276for details.
277Fields used in offloading checksum calculation to the hardware are kept in
278.Va m_pkthdr
279as well.
280See
281.Sx HARDWARE-ASSISTED CHECKSUM CALCULATION
282for details.
283.Pp
284If small enough, data is stored in the internal data buffer of an
285.Vt mbuf .
286If the data is sufficiently large, another
287.Vt mbuf
288may be added to the
289.Vt mbuf chain ,
290or external storage may be associated with the
291.Vt mbuf .
292.Dv MHLEN
293bytes of data can fit into an
294.Vt mbuf
295with the
296.Dv M_PKTHDR
297flag set,
298.Dv MLEN
299bytes can otherwise.
300.Pp
301If external storage is being associated with an
302.Vt mbuf ,
303the
304.Va m_ext
305header is added at the cost of losing the internal data buffer.
306It includes a pointer to external storage, the size of the storage,
307a pointer to a function used for freeing the storage,
308a pointer to an optional argument that can be passed to the function,
309and a pointer to a reference counter.
310An
311.Vt mbuf
312using external storage has the
313.Dv M_EXT
314flag set.
315.Pp
316The system supplies a macro for allocating the desired external storage
317buffer,
318.Dv MEXTADD .
319.Pp
320The allocation and management of the reference counter is handled by the
321subsystem.
322.Pp
323The system also supplies a default type of external storage buffer called an
324.Vt mbuf cluster .
325.Vt Mbuf clusters
326can be allocated and configured with the use of the
327.Dv MCLGET
328macro.
329Each
330.Vt mbuf cluster
331is
332.Dv MCLBYTES
333in size, where MCLBYTES is a machine-dependent constant.
334The system defines an advisory macro
335.Dv MINCLSIZE ,
336which is the smallest amount of data to put into an
337.Vt mbuf cluster .
338It is equal to
339.Dv MHLEN
340plus one.
341It is typically preferable to store data into the data region of an
342.Vt mbuf ,
343if size permits, as opposed to allocating a separate
344.Vt mbuf cluster
345to hold the same data.
346.\"
347.Ss Macros and Functions
348There are numerous predefined macros and functions that provide the
349developer with common utilities.
350.\"
351.Bl -ohang -offset indent
352.It Fn mtod mbuf type
353Convert an
354.Fa mbuf
355pointer to a data pointer.
356The macro expands to the data pointer cast to the pointer of the specified
357.Fa type .
358.Sy Note :
359It is advisable to ensure that there is enough contiguous data in
360.Fa mbuf .
361See
362.Fn m_pullup
363for details.
364.It Fn MGET mbuf how type
365Allocate an
366.Vt mbuf
367and initialize it to contain internal data.
368.Fa mbuf
369will point to the allocated
370.Vt mbuf
371on success, or be set to
372.Dv NULL
373on failure.
374The
375.Fa how
376argument is to be set to
377.Dv M_WAITOK
378or
379.Dv M_NOWAIT .
380It specifies whether the caller is willing to block if necessary.
381A number of other functions and macros related to
382.Vt mbufs
383have the same argument because they may
384at some point need to allocate new
385.Vt mbufs .
386.It Fn MGETHDR mbuf how type
387Allocate an
388.Vt mbuf
389and initialize it to contain a packet header
390and internal data.
391See
392.Fn MGET
393for details.
394.It Fn MEXTADD mbuf buf size free opt_arg1 opt_arg2 flags type
395Associate externally managed data with
396.Fa mbuf .
397Any internal data contained in the mbuf will be discarded, and the
398.Dv M_EXT
399flag will be set.
400The
401.Fa buf
402and
403.Fa size
404arguments are the address and length, respectively, of the data.
405The
406.Fa free
407argument points to a function which will be called to free the data
408when the mbuf is freed; it is only used if
409.Fa type
410is
411.Dv EXT_EXTREF .
412The
413.Fa opt_arg1
414and
415.Fa opt_arg2
416arguments will be passed unmodified to
417.Fa free .
418The
419.Fa flags
420argument specifies additional
421.Vt mbuf
422flags; it is not necessary to specify
423.Dv M_EXT .
424Finally, the
425.Fa type
426argument specifies the type of external data, which controls how it
427will be disposed of when the
428.Vt mbuf
429is freed.
430In most cases, the correct value is
431.Dv EXT_EXTREF .
432.It Fn MCLGET mbuf how
433Allocate and attach an
434.Vt mbuf cluster
435to
436.Fa mbuf .
437If the macro fails, the
438.Dv M_EXT
439flag will not be set in
440.Fa mbuf .
441.It Fn M_ALIGN mbuf len
442Set the pointer
443.Fa mbuf->m_data
444to place an object of the size
445.Fa len
446at the end of the internal data area of
447.Fa mbuf ,
448long word aligned.
449Applicable only if
450.Fa mbuf
451is newly allocated with
452.Fn MGET
453or
454.Fn m_get .
455.It Fn MH_ALIGN mbuf len
456Serves the same purpose as
457.Fn M_ALIGN
458does, but only for
459.Fa mbuf
460newly allocated with
461.Fn MGETHDR
462or
463.Fn m_gethdr ,
464or initialized by
465.Fn m_dup_pkthdr
466or
467.Fn m_move_pkthdr .
468.It Fn m_align mbuf len
469Services the same purpose as
470.Fn M_ALIGN
471but handles any type of mbuf.
472.It Fn M_LEADINGSPACE mbuf
473Returns the number of bytes available before the beginning
474of data in
475.Fa mbuf .
476.It Fn M_TRAILINGSPACE mbuf
477Returns the number of bytes available after the end of data in
478.Fa mbuf .
479.It Fn M_PREPEND mbuf len how
480This macro operates on an
481.Vt mbuf chain .
482It is an optimized wrapper for
483.Fn m_prepend
484that can make use of possible empty space before data
485(e.g.\& left after trimming of a link-layer header).
486The new
487.Vt mbuf chain
488pointer or
489.Dv NULL
490is in
491.Fa mbuf
492after the call.
493.It Fn M_MOVE_PKTHDR to from
494Using this macro is equivalent to calling
495.Fn m_move_pkthdr to from .
496.It Fn M_WRITABLE mbuf
497This macro will evaluate true if
498.Fa mbuf
499is not marked
500.Dv M_RDONLY
501and if either
502.Fa mbuf
503does not contain external storage or,
504if it does,
505then if the reference count of the storage is not greater than 1.
506The
507.Dv M_RDONLY
508flag can be set in
509.Fa mbuf->m_flags .
510This can be achieved during setup of the external storage,
511by passing the
512.Dv M_RDONLY
513bit as a
514.Fa flags
515argument to the
516.Fn MEXTADD
517macro, or can be directly set in individual
518.Vt mbufs .
519.It Fn MCHTYPE mbuf type
520Change the type of
521.Fa mbuf
522to
523.Fa type .
524This is a relatively expensive operation and should be avoided.
525.El
526.Pp
527The functions are:
528.Bl -ohang -offset indent
529.It Fn m_get how type
530A function version of
531.Fn MGET
532for non-critical paths.
533.It Fn m_get2 size how type flags
534Allocate an
535.Vt mbuf
536with enough space to hold specified amount of data.
537.It Fn m_getm orig len how type
538Allocate
539.Fa len
540bytes worth of
541.Vt mbufs
542and
543.Vt mbuf clusters
544if necessary and append the resulting allocated
545.Vt mbuf chain
546to the
547.Vt mbuf chain
548.Fa orig ,
549if it is
550.No non- Ns Dv NULL .
551If the allocation fails at any point,
552free whatever was allocated and return
553.Dv NULL .
554If
555.Fa orig
556is
557.No non- Ns Dv NULL ,
558it will not be freed.
559It is possible to use
560.Fn m_getm
561to either append
562.Fa len
563bytes to an existing
564.Vt mbuf
565or
566.Vt mbuf chain
567(for example, one which may be sitting in a pre-allocated ring)
568or to simply perform an all-or-nothing
569.Vt mbuf
570and
571.Vt mbuf cluster
572allocation.
573.It Fn m_gethdr how type
574A function version of
575.Fn MGETHDR
576for non-critical paths.
577.It Fn m_getcl how type flags
578Fetch an
579.Vt mbuf
580with a
581.Vt mbuf cluster
582attached to it.
583If one of the allocations fails, the entire allocation fails.
584This routine is the preferred way of fetching both the
585.Vt mbuf
586and
587.Vt mbuf cluster
588together, as it avoids having to unlock/relock between allocations.
589Returns
590.Dv NULL
591on failure.
592.It Fn m_getjcl how type flags size
593This is like
594.Fn m_getcl
595but it the size of the cluster allocated will be large enough for
596.Fa size
597bytes.
598.It Fn m_getclr how type
599Allocate an
600.Vt mbuf
601and zero out the data region.
602.It Fn m_free mbuf
603Frees
604.Vt mbuf .
605Returns
606.Va m_next
607of the freed
608.Vt mbuf .
609.El
610.Pp
611The functions below operate on
612.Vt mbuf chains .
613.Bl -ohang -offset indent
614.It Fn m_freem mbuf
615Free an entire
616.Vt mbuf chain ,
617including any external storage.
618.\"
619.It Fn m_adj mbuf len
620Trim
621.Fa len
622bytes from the head of an
623.Vt mbuf chain
624if
625.Fa len
626is positive, from the tail otherwise.
627.\"
628.It Fn m_append mbuf len cp
629Append
630.Vt len
631bytes of data
632.Vt cp
633to the
634.Vt mbuf chain .
635Extend the mbuf chain if the new data does not fit in
636existing space.
637.\"
638.It Fn m_prepend mbuf len how
639Allocate a new
640.Vt mbuf
641and prepend it to the
642.Vt mbuf chain ,
643handle
644.Dv M_PKTHDR
645properly.
646.Sy Note :
647It does not allocate any
648.Vt mbuf clusters ,
649so
650.Fa len
651must be less than
652.Dv MLEN
653or
654.Dv MHLEN ,
655depending on the
656.Dv M_PKTHDR
657flag setting.
658.\"
659.It Fn m_copyup mbuf len dstoff
660Similar to
661.Fn m_pullup
662but copies
663.Fa len
664bytes of data into a new mbuf at
665.Fa dstoff
666bytes into the mbuf.
667The
668.Fa dstoff
669argument aligns the data and leaves room for a link layer header.
670Returns the new
671.Vt mbuf chain
672on success,
673and frees the
674.Vt mbuf chain
675and returns
676.Dv NULL
677on failure.
678.Sy Note :
679The function does not allocate
680.Vt mbuf clusters ,
681so
682.Fa len + dstoff
683must be less than
684.Dv MHLEN .
685.\"
686.It Fn m_pullup mbuf len
687Arrange that the first
688.Fa len
689bytes of an
690.Vt mbuf chain
691are contiguous and lay in the data area of
692.Fa mbuf ,
693so they are accessible with
694.Fn mtod mbuf type .
695It is important to remember that this may involve
696reallocating some mbufs and moving data so all pointers
697referencing data within the old mbuf chain
698must be recalculated or made invalid.
699Return the new
700.Vt mbuf chain
701on success,
702.Dv NULL
703on failure
704(the
705.Vt mbuf chain
706is freed in this case).
707.Sy Note :
708It does not allocate any
709.Vt mbuf clusters ,
710so
711.Fa len
712must be less than or equal to
713.Dv MHLEN .
714.\"
715.It Fn m_pulldown mbuf offset len offsetp
716Arrange that
717.Fa len
718bytes between
719.Fa offset
720and
721.Fa offset + len
722in the
723.Vt mbuf chain
724are contiguous and lay in the data area of
725.Fa mbuf ,
726so they are accessible with
727.Fn mtod mbuf type .
728.Fa len
729must be smaller than, or equal to, the size of an
730.Vt mbuf cluster .
731Return a pointer to an intermediate
732.Vt mbuf
733in the chain containing the requested region;
734the offset in the data region of the
735.Vt mbuf chain
736to the data contained in the returned mbuf is stored in
737.Fa *offsetp .
738If
739.Fa offp
740is NULL, the region may be accessed using
741.Fn mtod mbuf type .
742If
743.Fa offp
744is non-NULL, the region may be accessed using
745.Fn mtod mbuf uint8_t + *offsetp .
746The region of the mbuf chain between its beginning and
747.Fa off
748is not modified, therefore it is safe to hold pointers to data within
749this region before calling
750.Fn m_pulldown .
751.\"
752.It Fn m_copym mbuf offset len how
753Make a copy of an
754.Vt mbuf chain
755starting
756.Fa offset
757bytes from the beginning, continuing for
758.Fa len
759bytes.
760If
761.Fa len
762is
763.Dv M_COPYALL ,
764copy to the end of the
765.Vt mbuf chain .
766.Sy Note :
767The copy is read-only, because the
768.Vt mbuf clusters
769are not copied, only their reference counts are incremented.
770.\"
771.It Fn m_copypacket mbuf how
772Copy an entire packet including header, which must be present.
773This is an optimized version of the common case
774.Fn m_copym mbuf 0 M_COPYALL how .
775.Sy Note :
776the copy is read-only, because the
777.Vt mbuf clusters
778are not copied, only their reference counts are incremented.
779.\"
780.It Fn m_dup mbuf how
781Copy a packet header
782.Vt mbuf chain
783into a completely new
784.Vt mbuf chain ,
785including copying any
786.Vt mbuf clusters .
787Use this instead of
788.Fn m_copypacket
789when you need a writable copy of an
790.Vt mbuf chain .
791.\"
792.It Fn m_copydata mbuf offset len buf
793Copy data from an
794.Vt mbuf chain
795starting
796.Fa off
797bytes from the beginning, continuing for
798.Fa len
799bytes, into the indicated buffer
800.Fa buf .
801.\"
802.It Fn m_copyback mbuf offset len buf
803Copy
804.Fa len
805bytes from the buffer
806.Fa buf
807back into the indicated
808.Vt mbuf chain ,
809starting at
810.Fa offset
811bytes from the beginning of the
812.Vt mbuf chain ,
813extending the
814.Vt mbuf chain
815if necessary.
816.Sy Note :
817It does not allocate any
818.Vt mbuf clusters ,
819just adds
820.Vt mbufs
821to the
822.Vt mbuf chain .
823It is safe to set
824.Fa offset
825beyond the current
826.Vt mbuf chain
827end: zeroed
828.Vt mbufs
829will be allocated to fill the space.
830.\"
831.It Fn m_length mbuf last
832Return the length of the
833.Vt mbuf chain ,
834and optionally a pointer to the last
835.Vt mbuf .
836.\"
837.It Fn m_dup_pkthdr to from how
838Upon the function's completion, the
839.Vt mbuf
840.Fa to
841will contain an identical copy of
842.Fa from->m_pkthdr
843and the per-packet attributes found in the
844.Vt mbuf chain
845.Fa from .
846The
847.Vt mbuf
848.Fa from
849must have the flag
850.Dv M_PKTHDR
851initially set, and
852.Fa to
853must be empty on entry.
854.\"
855.It Fn m_move_pkthdr to from
856Move
857.Va m_pkthdr
858and the per-packet attributes from the
859.Vt mbuf chain
860.Fa from
861to the
862.Vt mbuf
863.Fa to .
864The
865.Vt mbuf
866.Fa from
867must have the flag
868.Dv M_PKTHDR
869initially set, and
870.Fa to
871must be empty on entry.
872Upon the function's completion,
873.Fa from
874will have the flag
875.Dv M_PKTHDR
876and the per-packet attributes cleared.
877.\"
878.It Fn m_fixhdr mbuf
879Set the packet-header length to the length of the
880.Vt mbuf chain .
881.\"
882.It Fn m_devget buf len offset ifp copy
883Copy data from a device local memory pointed to by
884.Fa buf
885to an
886.Vt mbuf chain .
887The copy is done using a specified copy routine
888.Fa copy ,
889or
890.Fn bcopy
891if
892.Fa copy
893is
894.Dv NULL .
895.\"
896.It Fn m_cat m n
897Concatenate
898.Fa n
899to
900.Fa m .
901Both
902.Vt mbuf chains
903must be of the same type.
904.Fa N
905is still valid after the function returned.
906.Sy Note :
907It does not handle
908.Dv M_PKTHDR
909and friends.
910.\"
911.It Fn m_split mbuf len how
912Partition an
913.Vt mbuf chain
914in two pieces, returning the tail:
915all but the first
916.Fa len
917bytes.
918In case of failure, it returns
919.Dv NULL
920and attempts to restore the
921.Vt mbuf chain
922to its original state.
923.\"
924.It Fn m_apply mbuf off len f arg
925Apply a function to an
926.Vt mbuf chain ,
927at offset
928.Fa off ,
929for length
930.Fa len
931bytes.
932Typically used to avoid calls to
933.Fn m_pullup
934which would otherwise be unnecessary or undesirable.
935.Fa arg
936is a convenience argument which is passed to the callback function
937.Fa f .
938.Pp
939Each time
940.Fn f
941is called, it will be passed
942.Fa arg ,
943a pointer to the
944.Fa data
945in the current mbuf, and the length
946.Fa len
947of the data in this mbuf to which the function should be applied.
948.Pp
949The function should return zero to indicate success;
950otherwise, if an error is indicated, then
951.Fn m_apply
952will return the error and stop iterating through the
953.Vt mbuf chain .
954.\"
955.It Fn m_getptr mbuf loc off
956Return a pointer to the mbuf containing the data located at
957.Fa loc
958bytes from the beginning of the
959.Vt mbuf chain .
960The corresponding offset into the mbuf will be stored in
961.Fa *off .
962.It Fn m_defrag m0 how
963Defragment an mbuf chain, returning the shortest possible
964chain of mbufs and clusters.
965If allocation fails and this can not be completed,
966.Dv NULL
967will be returned and the original chain will be unchanged.
968Upon success, the original chain will be freed and the new
969chain will be returned.
970.Fa how
971should be either
972.Dv M_WAITOK
973or
974.Dv M_NOWAIT ,
975depending on the caller's preference.
976.Pp
977This function is especially useful in network drivers, where
978certain long mbuf chains must be shortened before being added
979to TX descriptor lists.
980.It Fn m_unshare m0 how
981Create a version of the specified mbuf chain whose
982contents can be safely modified without affecting other users.
983If allocation fails and this operation can not be completed,
984.Dv NULL
985will be returned.
986The original mbuf chain is always reclaimed and the reference
987count of any shared mbuf clusters is decremented.
988.Fa how
989should be either
990.Dv M_WAITOK
991or
992.Dv M_NOWAIT ,
993depending on the caller's preference.
994As a side-effect of this process the returned
995mbuf chain may be compacted.
996.Pp
997This function is especially useful in the transmit path of
998network code, when data must be encrypted or otherwise
999altered prior to transmission.
1000.El
1001.Sh HARDWARE-ASSISTED CHECKSUM CALCULATION
1002This section currently applies to TCP/IP only.
1003In order to save the host CPU resources, computing checksums is
1004offloaded to the network interface hardware if possible.
1005The
1006.Va m_pkthdr
1007member of the leading
1008.Vt mbuf
1009of a packet contains two fields used for that purpose,
1010.Vt int Va csum_flags
1011and
1012.Vt int Va csum_data .
1013The meaning of those fields depends on the direction a packet flows in,
1014and on whether the packet is fragmented.
1015Henceforth,
1016.Va csum_flags
1017or
1018.Va csum_data
1019of a packet
1020will denote the corresponding field of the
1021.Va m_pkthdr
1022member of the leading
1023.Vt mbuf
1024in the
1025.Vt mbuf chain
1026containing the packet.
1027.Pp
1028On output, checksum offloading is attempted after the outgoing
1029interface has been determined for a packet.
1030The interface-specific field
1031.Va ifnet.if_data.ifi_hwassist
1032(see
1033.Xr ifnet 9 )
1034is consulted for the capabilities of the interface to assist in
1035computing checksums.
1036The
1037.Va csum_flags
1038field of the packet header is set to indicate which actions the interface
1039is supposed to perform on it.
1040The actions unsupported by the network interface are done in the
1041software prior to passing the packet down to the interface driver;
1042such actions will never be requested through
1043.Va csum_flags .
1044.Pp
1045The flags demanding a particular action from an interface are as follows:
1046.Bl -tag -width ".Dv CSUM_TCP" -offset indent
1047.It Dv CSUM_IP
1048The IP header checksum is to be computed and stored in the
1049corresponding field of the packet.
1050The hardware is expected to know the format of an IP header
1051to determine the offset of the IP checksum field.
1052.It Dv CSUM_TCP
1053The TCP checksum is to be computed.
1054(See below.)
1055.It Dv CSUM_UDP
1056The UDP checksum is to be computed.
1057(See below.)
1058.El
1059.Pp
1060Should a TCP or UDP checksum be offloaded to the hardware,
1061the field
1062.Va csum_data
1063will contain the byte offset of the checksum field relative to the
1064end of the IP header.
1065In this case, the checksum field will be initially
1066set by the TCP/IP module to the checksum of the pseudo header
1067defined by the TCP and UDP specifications.
1068.Pp
1069On input, an interface indicates the actions it has performed
1070on a packet by setting one or more of the following flags in
1071.Va csum_flags
1072associated with the packet:
1073.Bl -tag -width ".Dv CSUM_IP_CHECKED" -offset indent
1074.It Dv CSUM_IP_CHECKED
1075The IP header checksum has been computed.
1076.It Dv CSUM_IP_VALID
1077The IP header has a valid checksum.
1078This flag can appear only in combination with
1079.Dv CSUM_IP_CHECKED .
1080.It Dv CSUM_DATA_VALID
1081The checksum of the data portion of the IP packet has been computed
1082and stored in the field
1083.Va csum_data
1084in network byte order.
1085.It Dv CSUM_PSEUDO_HDR
1086Can be set only along with
1087.Dv CSUM_DATA_VALID
1088to indicate that the IP data checksum found in
1089.Va csum_data
1090allows for the pseudo header defined by the TCP and UDP specifications.
1091Otherwise the checksum of the pseudo header must be calculated by
1092the host CPU and added to
1093.Va csum_data
1094to obtain the final checksum to be used for TCP or UDP validation purposes.
1095.El
1096.Pp
1097If a particular network interface just indicates success or
1098failure of TCP or UDP checksum validation without returning
1099the exact value of the checksum to the host CPU, its driver can mark
1100.Dv CSUM_DATA_VALID
1101and
1102.Dv CSUM_PSEUDO_HDR
1103in
1104.Va csum_flags ,
1105and set
1106.Va csum_data
1107to
1108.Li 0xFFFF
1109hexadecimal to indicate a valid checksum.
1110It is a peculiarity of the algorithm used that the Internet checksum
1111calculated over any valid packet will be
1112.Li 0xFFFF
1113as long as the original checksum field is included.
1114.Sh STRESS TESTING
1115When running a kernel compiled with the option
1116.Dv MBUF_STRESS_TEST ,
1117the following
1118.Xr sysctl 8 Ns
1119-controlled options may be used to create
1120various failure/extreme cases for testing of network drivers
1121and other parts of the kernel that rely on
1122.Vt mbufs .
1123.Bl -tag -width ident
1124.It Va net.inet.ip.mbuf_frag_size
1125Causes
1126.Fn ip_output
1127to fragment outgoing
1128.Vt mbuf chains
1129into fragments of the specified size.
1130Setting this variable to 1 is an excellent way to
1131test the long
1132.Vt mbuf chain
1133handling ability of network drivers.
1134.It Va kern.ipc.m_defragrandomfailures
1135Causes the function
1136.Fn m_defrag
1137to randomly fail, returning
1138.Dv NULL .
1139Any piece of code which uses
1140.Fn m_defrag
1141should be tested with this feature.
1142.El
1143.Sh RETURN VALUES
1144See above.
1145.Sh SEE ALSO
1146.Xr ifnet 9 ,
1147.Xr mbuf_tags 9
1148.Sh HISTORY
1149.\" Please correct me if I'm wrong
1150.Vt Mbufs
1151appeared in an early version of
1152.Bx .
1153Besides being used for network packets, they were used
1154to store various dynamic structures, such as routing table
1155entries, interface addresses, protocol control blocks, etc.
1156In more recent
1157.Fx
1158use of
1159.Vt mbufs
1160is almost entirely limited to packet storage, with
1161.Xr uma 9
1162zones being used directly to store other network-related memory.
1163.Pp
1164Historically, the
1165.Vt mbuf
1166allocator has been a special-purpose memory allocator able to run in
1167interrupt contexts and allocating from a special kernel address space map.
1168As of
1169.Fx 5.3 ,
1170the
1171.Vt mbuf
1172allocator is a wrapper around
1173.Xr uma 9 ,
1174allowing caching of
1175.Vt mbufs ,
1176clusters, and
1177.Vt mbuf
1178+ cluster pairs in per-CPU caches, as well as bringing other benefits of
1179slab allocation.
1180.Sh AUTHORS
1181The original
1182.Nm
1183manual page was written by Yar Tikhiy.
1184The
1185.Xr uma 9
1186.Vt mbuf
1187allocator was written by Bosko Milekic.
1188