xref: /freebsd/share/man/man9/mbuf.9 (revision b6de9e91bd2c47efaeec72a08642f8fd99cc7b20)
1.\" Copyright (c) 2000 FreeBSD Inc.
2.\" All rights reserved.
3.\"
4.\" Redistribution and use in source and binary forms, with or without
5.\" modification, are permitted provided that the following conditions
6.\" are met:
7.\" 1. Redistributions of source code must retain the above copyright
8.\"    notice, this list of conditions and the following disclaimer.
9.\" 2. Redistributions in binary form must reproduce the above copyright
10.\"    notice, this list of conditions and the following disclaimer in the
11.\"    documentation and/or other materials provided with the distribution.
12.\"
13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16.\" ARE DISCLAIMED.  IN NO EVENT SHALL [your name] OR CONTRIBUTORS BE LIABLE
17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23.\" SUCH DAMAGE.
24.\"
25.\" $FreeBSD$
26.\"
27.Dd August 7, 2005
28.Dt MBUF 9
29.Os
30.\"
31.Sh NAME
32.Nm mbuf
33.Nd "memory management in the kernel IPC subsystem"
34.\"
35.Sh SYNOPSIS
36.In sys/param.h
37.In sys/systm.h
38.In sys/mbuf.h
39.\"
40.Ss Mbuf allocation macros
41.Fn MGET "struct mbuf *mbuf" "int how" "short type"
42.Fn MGETHDR "struct mbuf *mbuf" "int how" "short type"
43.Fn MCLGET "struct mbuf *mbuf" "int how"
44.Fo MEXTADD
45.Fa "struct mbuf *mbuf"
46.Fa "caddr_t buf"
47.Fa "u_int size"
48.Fa "void (*free)(void *opt_args)"
49.Fa "void *opt_args"
50.Fa "short flags"
51.Fa "int type"
52.Fc
53.Fn MEXTFREE "struct mbuf *mbuf"
54.Fn MEXT_ADD_REF "struct mbuf *mbuf"
55.Fn MEXT_REM_REF "struct mbuf *mbuf"
56.Fn MFREE "struct mbuf *mbuf" "struct mbuf *successor"
57.\"
58.Ss Mbuf utility macros
59.Fn mtod "struct mbuf *mbuf" "type"
60.Ft int
61.Fn MEXT_IS_REF "struct mbuf *mbuf"
62.Fn M_ALIGN "struct mbuf *mbuf" "u_int len"
63.Fn MH_ALIGN "struct mbuf *mbuf" "u_int len"
64.Ft int
65.Fn M_LEADINGSPACE "struct mbuf *mbuf"
66.Ft int
67.Fn M_TRAILINGSPACE "struct mbuf *mbuf"
68.Fn M_MOVE_PKTHDR "struct mbuf *to" "struct mbuf *from"
69.Fn M_PREPEND "struct mbuf *mbuf" "int len" "int how"
70.Fn MCHTYPE "struct mbuf *mbuf" "u_int type"
71.Ft int
72.Fn M_WRITABLE "struct mbuf *mbuf"
73.\"
74.Ss Mbuf allocation functions
75.Ft struct mbuf *
76.Fn m_get "int how" "int type"
77.Ft struct mbuf *
78.Fn m_getm "struct mbuf *orig" "int len" "int how" "int type"
79.Ft struct mbuf *
80.Fn m_getcl "int how" "short type" "int flags"
81.Ft struct mbuf *
82.Fn m_getclr "int how" "int type"
83.Ft struct mbuf *
84.Fn m_gethdr "int how" "int type"
85.Ft struct mbuf *
86.Fn m_free "struct mbuf *mbuf"
87.Ft void
88.Fn m_freem "struct mbuf *mbuf"
89.\"
90.Ss Mbuf utility functions
91.Ft void
92.Fn m_adj "struct mbuf *mbuf" "int len"
93.Ft void
94.Fn m_align "struct mbuf *mbuf" "int len"
95.Ft int
96.Fn m_append "struct mbuf *mbuf" "int len" "c_caddr_t cp"
97.Ft struct mbuf *
98.Fn m_prepend "struct mbuf *mbuf" "int len" "int how"
99.Ft struct mbuf *
100.Fn m_copyup "struct mbuf *mbuf" "int len" "int dstoff"
101.Ft struct mbuf *
102.Fn m_pullup "struct mbuf *mbuf" "int len"
103.Ft struct mbuf *
104.Fn m_copym "struct mbuf *mbuf" "int offset" "int len" "int how"
105.Ft struct mbuf *
106.Fn m_copypacket "struct mbuf *mbuf" "int how"
107.Ft struct mbuf *
108.Fn m_dup "struct mbuf *mbuf" "int how"
109.Ft void
110.Fn m_copydata "const struct mbuf *mbuf" "int offset" "int len" "caddr_t buf"
111.Ft void
112.Fn m_copyback "struct mbuf *mbuf" "int offset" "int len" "caddr_t buf"
113.Ft struct mbuf *
114.Fo m_devget
115.Fa "char *buf"
116.Fa "int len"
117.Fa "int offset"
118.Fa "struct ifnet *ifp"
119.Fa "void (*copy)(char *from, caddr_t to, u_int len)"
120.Fc
121.Ft void
122.Fn m_cat "struct mbuf *m" "struct mbuf *n"
123.Ft u_int
124.Fn m_fixhdr "struct mbuf *mbuf"
125.Ft void
126.Fn m_dup_pkthdr "struct mbuf *to" "struct mbuf *from"
127.Ft void
128.Fn m_move_pkthdr "struct mbuf *to" "struct mbuf *from"
129.Ft u_int
130.Fn m_length "struct mbuf *mbuf" "struct mbuf **last"
131.Ft struct mbuf *
132.Fn m_split "struct mbuf *mbuf" "int len" "int how"
133.Ft int
134.Fn m_apply "struct mbuf *mbuf" "int off" "int len" "int (*f)(void *arg, void *data, u_int len)" "void *arg"
135.Ft struct mbuf *
136.Fn m_getptr "struct mbuf *mbuf" "int loc" "int *off"
137.Ft struct mbuf *
138.Fn m_defrag "struct mbuf *m0" "int how"
139.\"
140.Sh DESCRIPTION
141An
142.Vt mbuf
143is a basic unit of memory management in the kernel IPC subsystem.
144Network packets and socket buffers are stored in
145.Vt mbufs .
146A network packet may span multiple
147.Vt mbufs
148arranged into a
149.Vt mbuf chain
150(linked list),
151which allows adding or trimming
152network headers with little overhead.
153.Pp
154While a developer should not bother with
155.Vt mbuf
156internals without serious
157reason in order to avoid incompatibilities with future changes, it
158is useful to understand the general structure of an
159.Vt mbuf .
160.Pp
161An
162.Vt mbuf
163consists of a variable-sized header and a small internal
164buffer for data.
165The total size of an
166.Vt mbuf ,
167.Dv MSIZE ,
168is a constant defined in
169.In sys/param.h .
170The
171.Vt mbuf
172header includes:
173.Pp
174.Bl -tag -width "m_nextpkt" -offset indent
175.It Va m_next
176.Pq Vt struct mbuf *
177A pointer to the next
178.Vt mbuf
179in the
180.Vt mbuf chain .
181.It Va m_nextpkt
182.Pq Vt struct mbuf *
183A pointer to the next
184.Vt mbuf chain
185in the queue.
186.It Va m_data
187.Pq Vt caddr_t
188A pointer to data attached to this
189.Vt mbuf .
190.It Va m_len
191.Pq Vt int
192The length of the data.
193.It Va m_type
194.Pq Vt short
195The type of the data.
196.It Va m_flags
197.Pq Vt int
198The
199.Vt mbuf
200flags.
201.El
202.Pp
203The
204.Vt mbuf
205flag bits are defined as follows:
206.Bd -literal
207/* mbuf flags */
208#define	M_EXT		0x0001	/* has associated external storage */
209#define	M_PKTHDR	0x0002	/* start of record */
210#define	M_EOR		0x0004	/* end of record */
211#define	M_RDONLY	0x0008	/* associated data marked read-only */
212#define	M_PROTO1	0x0010	/* protocol-specific */
213#define	M_PROTO2	0x0020 	/* protocol-specific */
214#define	M_PROTO3	0x0040	/* protocol-specific */
215#define	M_PROTO4	0x0080	/* protocol-specific */
216#define	M_PROTO5	0x0100	/* protocol-specific */
217#define	M_PROTO6	0x4000	/* protocol-specific (avoid M_BCAST conflict) */
218#define	M_FREELIST	0x8000	/* mbuf is on the free list */
219
220/* mbuf pkthdr flags (also stored in m_flags) */
221#define	M_BCAST		0x0200	/* send/received as link-level broadcast */
222#define	M_MCAST		0x0400	/* send/received as link-level multicast */
223#define	M_FRAG		0x0800	/* packet is fragment of larger packet */
224#define	M_FIRSTFRAG	0x1000	/* packet is first fragment */
225#define	M_LASTFRAG	0x2000	/* packet is last fragment */
226.Ed
227.Pp
228The available
229.Vt mbuf
230types are defined as follows:
231.Bd -literal
232/* mbuf types */
233#define	MT_DATA		1	/* dynamic (data) allocation */
234#define	MT_HEADER	2	/* packet header */
235#define	MT_SONAME	8	/* socket name */
236#define	MT_FTABLE	11	/* fragment reassembly header */
237#define	MT_CONTROL	14	/* extra-data protocol message */
238#define	MT_OOBDATA	15	/* expedited data */
239.Ed
240.Pp
241If the
242.Dv M_PKTHDR
243flag is set, a
244.Vt struct pkthdr Va m_pkthdr
245is added to the
246.Vt mbuf
247header.
248It contains a pointer to the interface
249the packet has been received from
250.Pq Vt struct ifnet Va *rcvif ,
251and the total packet length
252.Pq Vt int Va len .
253Optionally, it may also contain an attached list of packet tags
254.Pq Vt "struct m_tag" .
255See
256.Xr mbuf_tags 9
257for details.
258Fields used in offloading checksum calculation to the hardware are kept in
259.Va m_pkthdr
260as well.
261See
262.Sx HARDWARE-ASSISTED CHECKSUM CALCULATION
263for details.
264.Pp
265If small enough, data is stored in the internal data buffer of an
266.Vt mbuf .
267If the data is sufficiently large, another
268.Vt mbuf
269may be added to the
270.Vt mbuf chain ,
271or external storage may be associated with the
272.Vt mbuf .
273.Dv MHLEN
274bytes of data can fit into an
275.Vt mbuf
276with the
277.Dv M_PKTHDR
278flag set,
279.Dv MLEN
280bytes can otherwise.
281.Pp
282If external storage is being associated with an
283.Vt mbuf ,
284the
285.Va m_ext
286header is added at the cost of losing the internal data buffer.
287It includes a pointer to external storage, the size of the storage,
288a pointer to a function used for freeing the storage,
289a pointer to an optional argument that can be passed to the function,
290and a pointer to a reference counter.
291An
292.Vt mbuf
293using external storage has the
294.Dv M_EXT
295flag set.
296.Pp
297The system supplies a macro for allocating the desired external storage
298buffer,
299.Dv MEXTADD .
300.Pp
301The allocation and management of the reference counter is handled by the
302subsystem.
303The developer can check whether the reference count for the
304external storage of a given
305.Vt mbuf
306is greater than 1 with the
307.Dv MEXT_IS_REF
308macro.
309Similarly, the developer can directly add and remove references,
310if absolutely necessary, with the use of the
311.Dv MEXT_ADD_REF
312and
313.Dv MEXT_REM_REF
314macros.
315.Pp
316The system also supplies a default type of external storage buffer called an
317.Vt mbuf cluster .
318.Vt Mbuf clusters
319can be allocated and configured with the use of the
320.Dv MCLGET
321macro.
322Each
323.Vt mbuf cluster
324is
325.Dv MCLBYTES
326in size, where MCLBYTES is a machine-dependent constant.
327The system defines an advisory macro
328.Dv MINCLSIZE ,
329which is the smallest amount of data to put into an
330.Vt mbuf cluster .
331It is equal to the sum of
332.Dv MLEN
333and
334.Dv MHLEN .
335It is typically preferable to store data into the data region of an
336.Vt mbuf ,
337if size permits, as opposed to allocating a separate
338.Vt mbuf cluster
339to hold the same data.
340.\"
341.Ss Macros and Functions
342There are numerous predefined macros and functions that provide the
343developer with common utilities.
344.\"
345.Bl -ohang -offset indent
346.It Fn mtod mbuf type
347Convert an
348.Fa mbuf
349pointer to a data pointer.
350The macro expands to the data pointer cast to the pointer of the specified
351.Fa type .
352.Sy Note :
353It is advisable to ensure that there is enough contiguous data in
354.Fa mbuf .
355See
356.Fn m_pullup
357for details.
358.It Fn MGET mbuf how type
359Allocate an
360.Vt mbuf
361and initialize it to contain internal data.
362.Fa mbuf
363will point to the allocated
364.Vt mbuf
365on success, or be set to
366.Dv NULL
367on failure.
368The
369.Fa how
370argument is to be set to
371.Dv M_TRYWAIT
372or
373.Dv M_DONTWAIT .
374It specifies whether the caller is willing to block if necessary.
375If
376.Fa how
377is set to
378.Dv M_TRYWAIT ,
379a failed allocation will result in the caller being put
380to sleep for a designated
381kern.ipc.mbuf_wait
382.Xr ( sysctl 8
383tunable)
384number of ticks.
385A number of other functions and macros related to
386.Vt mbufs
387have the same argument because they may
388at some point need to allocate new
389.Vt mbufs .
390.Pp
391Programmers should be careful not to confuse the
392.Vt mbuf
393allocation flag
394.Dv M_DONTWAIT
395with the
396.Xr malloc 9
397allocation flag,
398.Dv M_NOWAIT .
399They are not the same.
400.It Fn MGETHDR mbuf how type
401Allocate an
402.Vt mbuf
403and initialize it to contain a packet header
404and internal data.
405See
406.Fn MGET
407for details.
408.It Fn MCLGET mbuf how
409Allocate and attach an
410.Vt mbuf cluster
411to
412.Fa mbuf .
413If the macro fails, the
414.Dv M_EXT
415flag will not be set in
416.Fa mbuf .
417.It Fn M_ALIGN mbuf len
418Set the pointer
419.Fa mbuf->m_data
420to place an object of the size
421.Fa len
422at the end of the internal data area of
423.Fa mbuf ,
424long word aligned.
425Applicable only if
426.Fa mbuf
427is newly allocated with
428.Fn MGET
429or
430.Fn m_get .
431.It Fn MH_ALIGN mbuf len
432Serves the same purpose as
433.Fn M_ALIGN
434does, but only for
435.Fa mbuf
436newly allocated with
437.Fn MGETHDR
438or
439.Fn m_gethdr ,
440or initialized by
441.Fn m_dup_pkthdr
442or
443.Fn m_move_pkthdr .
444.It Fn m_align mbuf len
445Services the same purpose as
446.Fn M_ALIGN
447but handles any type of mbuf.
448.It Fn M_LEADINGSPACE mbuf
449Returns the number of bytes available before the beginning
450of data in
451.Fa mbuf .
452.It Fn M_TRAILINGSPACE mbuf
453Returns the number of bytes available after the end of data in
454.Fa mbuf .
455.It Fn M_PREPEND mbuf len how
456This macro operates on an
457.Vt mbuf chain .
458It is an optimized wrapper for
459.Fn m_prepend
460that can make use of possible empty space before data
461(e.g.\& left after trimming of a link-layer header).
462The new
463.Vt mbuf chain
464pointer or
465.Dv NULL
466is in
467.Fa mbuf
468after the call.
469.It Fn M_MOVE_PKTHDR to from
470Using this macro is equivalent to calling
471.Fn m_move_pkthdr to from .
472.It Fn M_WRITABLE mbuf
473This macro will evaluate true if
474.Fa mbuf
475is not marked
476.Dv M_RDONLY
477and if either
478.Fa mbuf
479does not contain external storage or,
480if it does,
481then if the reference count of the storage is not greater than 1.
482The
483.Dv M_RDONLY
484flag can be set in
485.Fa mbuf->m_flags .
486This can be achieved during setup of the external storage,
487by passing the
488.Dv M_RDONLY
489bit as a
490.Fa flags
491argument to the
492.Fn MEXTADD
493macro, or can be directly set in individual
494.Vt mbufs .
495.It Fn MCHTYPE mbuf type
496Change the type of
497.Fa mbuf
498to
499.Fa type .
500This is a relatively expensive operation and should be avoided.
501.El
502.Pp
503The functions are:
504.Bl -ohang -offset indent
505.It Fn m_get how type
506A function version of
507.Fn MGET
508for non-critical paths.
509.It Fn m_getm orig len how type
510Allocate
511.Fa len
512bytes worth of
513.Vt mbufs
514and
515.Vt mbuf clusters
516if necessary and append the resulting allocated
517.Vt mbuf chain
518to the
519.Vt mbuf chain
520.Fa orig ,
521if it is
522.No non- Ns Dv NULL .
523If the allocation fails at any point,
524free whatever was allocated and return
525.Dv NULL .
526If
527.Fa orig
528is
529.No non- Ns Dv NULL ,
530it will not be freed.
531It is possible to use
532.Fn m_getm
533to either append
534.Fa len
535bytes to an existing
536.Vt mbuf
537or
538.Vt mbuf chain
539(for example, one which may be sitting in a pre-allocated ring)
540or to simply perform an all-or-nothing
541.Vt mbuf
542and
543.Vt mbuf cluster
544allocation.
545.It Fn m_gethdr how type
546A function version of
547.Fn MGETHDR
548for non-critical paths.
549.It Fn m_getcl how type flags
550Fetch an
551.Vt mbuf
552with a
553.Vt mbuf cluster
554attached to it.
555If one of the allocations fails, the entire allocation fails.
556This routine is the preferred way of fetching both the
557.Vt mbuf
558and
559.Vt mbuf cluster
560together, as it avoids having to unlock/relock between allocations.
561Returns
562.Dv NULL
563on failure.
564.It Fn m_getclr how type
565Allocate an
566.Vt mbuf
567and zero out the data region.
568.It Fn m_free mbuf
569Frees
570.Vt mbuf .
571Returns
572.Va m_next
573of the freed
574.Vt mbuf .
575.El
576.Pp
577The functions below operate on
578.Vt mbuf chains .
579.Bl -ohang -offset indent
580.It Fn m_freem mbuf
581Free an entire
582.Vt mbuf chain ,
583including any external storage.
584.\"
585.It Fn m_adj mbuf len
586Trim
587.Fa len
588bytes from the head of an
589.Vt mbuf chain
590if
591.Fa len
592is positive, from the tail otherwise.
593.\"
594.It Fn m_append mbuf len cp
595Append
596.Vt len
597bytes of data
598.Vt cp
599to the
600.Vt mbuf chain .
601Extend the mbuf chain if the new data does not fit in
602existing space.
603.\"
604.It Fn m_prepend mbuf len how
605Allocate a new
606.Vt mbuf
607and prepend it to the
608.Vt mbuf chain ,
609handle
610.Dv M_PKTHDR
611properly.
612.Sy Note :
613It does not allocate any
614.Vt mbuf clusters ,
615so
616.Fa len
617must be less than
618.Dv MLEN
619or
620.Dv MHLEN ,
621depending on the
622.Dv M_PKTHDR
623flag setting.
624.\"
625.It Fn m_copyup mbuf len dstoff
626Similar to
627.Fn m_pullup
628but copies
629.Fa len
630bytes of data into a new mbuf at
631.Fa dstoff
632bytes into the mbuf.
633The
634.Fa dstoff
635argument aligns the data and leaves room for a link layer header.
636Returns the new
637.Vt mbuf chain
638on success,
639and frees the
640.Vt mbuf chain
641and returns
642.Dv NULL
643on failure.
644.Sy Note :
645The function does not allocate
646.Vt mbuf clusters ,
647so
648.Fa len + dstoff
649must be less than
650.Dv MHLEN .
651.\"
652.It Fn m_pullup mbuf len
653Arrange that the first
654.Fa len
655bytes of an
656.Vt mbuf chain
657are contiguous and lay in the data area of
658.Fa mbuf ,
659so they are accessible with
660.Fn mtod mbuf type .
661Return the new
662.Vt mbuf chain
663on success,
664.Dv NULL
665on failure
666(the
667.Vt mbuf chain
668is freed in this case).
669.Sy Note :
670It does not allocate any
671.Vt mbuf clusters ,
672so
673.Fa len
674must be less than
675.Dv MHLEN .
676.\"
677.It Fn m_copym mbuf offset len how
678Make a copy of an
679.Vt mbuf chain
680starting
681.Fa offset
682bytes from the beginning, continuing for
683.Fa len
684bytes.
685If
686.Fa len
687is
688.Dv M_COPYALL ,
689copy to the end of the
690.Vt mbuf chain .
691.Sy Note :
692The copy is read-only, because the
693.Vt mbuf clusters
694are not copied, only their reference counts are incremented.
695.\"
696.It Fn m_copypacket mbuf how
697Copy an entire packet including header, which must be present.
698This is an optimized version of the common case
699.Fn m_copym mbuf 0 M_COPYALL how .
700.Sy Note :
701the copy is read-only, because the
702.Vt mbuf clusters
703are not copied, only their reference counts are incremented.
704.\"
705.It Fn m_dup mbuf how
706Copy a packet header
707.Vt mbuf chain
708into a completely new
709.Vt mbuf chain ,
710including copying any
711.Vt mbuf clusters .
712Use this instead of
713.Fn m_copypacket
714when you need a writable copy of an
715.Vt mbuf chain .
716.\"
717.It Fn m_copydata mbuf offset len buf
718Copy data from an
719.Vt mbuf chain
720starting
721.Fa off
722bytes from the beginning, continuing for
723.Fa len
724bytes, into the indicated buffer
725.Fa buf .
726.\"
727.It Fn m_copyback mbuf offset len buf
728Copy
729.Fa len
730bytes from the buffer
731.Fa buf
732back into the indicated
733.Vt mbuf chain ,
734starting at
735.Fa offset
736bytes from the beginning of the
737.Vt mbuf chain ,
738extending the
739.Vt mbuf chain
740if necessary.
741.Sy Note :
742It does not allocate any
743.Vt mbuf clusters ,
744just adds
745.Vt mbufs
746to the
747.Vt mbuf chain .
748It is safe to set
749.Fa offset
750beyond the current
751.Vt mbuf chain
752end: zeroed
753.Vt mbufs
754will be allocated to fill the space.
755.\"
756.It Fn m_length mbuf last
757Return the length of the
758.Vt mbuf chain ,
759and optionally a pointer to the last
760.Vt mbuf .
761.\"
762.It Fn m_dup_pkthdr to from how
763Upon the function's completion, the
764.Vt mbuf
765.Fa to
766will contain an identical copy of
767.Fa from->m_pkthdr
768and the per-packet attributes found in the
769.Vt mbuf chain
770.Fa from .
771The
772.Vt mbuf
773.Fa from
774must have the flag
775.Dv M_PKTHDR
776initially set, and
777.Fa to
778must be empty on entry.
779.\"
780.It Fn m_move_pkthdr to from
781Move
782.Va m_pkthdr
783and the per-packet attributes from the
784.Vt mbuf chain
785.Fa from
786to the
787.Vt mbuf
788.Fa to .
789The
790.Vt mbuf
791.Fa from
792must have the flag
793.Dv M_PKTHDR
794initially set, and
795.Fa to
796must be empty on entry.
797Upon the function's completion,
798.Fa from
799will have the flag
800.Dv M_PKTHDR
801and the per-packet attributes cleared.
802.\"
803.It Fn m_fixhdr mbuf
804Set the packet-header length to the length of the
805.Vt mbuf chain .
806.\"
807.It Fn m_devget buf len offset ifp copy
808Copy data from a device local memory pointed to by
809.Fa buf
810to an
811.Vt mbuf chain .
812The copy is done using a specified copy routine
813.Fa copy ,
814or
815.Fn bcopy
816if
817.Fa copy
818is
819.Dv NULL .
820.\"
821.It Fn m_cat m n
822Concatenate
823.Fa n
824to
825.Fa m .
826Both
827.Vt mbuf chains
828must be of the same type.
829.Fa N
830is still valid after the function returned.
831.Sy Note :
832It does not handle
833.Dv M_PKTHDR
834and friends.
835.\"
836.It Fn m_split mbuf len how
837Partition an
838.Vt mbuf chain
839in two pieces, returning the tail:
840all but the first
841.Fa len
842bytes.
843In case of failure, it returns
844.Dv NULL
845and attempts to restore the
846.Vt mbuf chain
847to its original state.
848.\"
849.It Fn m_apply mbuf off len f arg
850Apply a function to an
851.Vt mbuf chain ,
852at offset
853.Fa off ,
854for length
855.Fa len
856bytes.
857Typically used to avoid calls to
858.Fn m_pullup
859which would otherwise be unnecessary or undesirable.
860.Fa arg
861is a convenience argument which is passed to the callback function
862.Fa f .
863.Pp
864Each time
865.Fn f
866is called, it will be passed
867.Fa arg ,
868a pointer to the
869.Fa data
870in the current mbuf, and the length
871.Fa len
872of the data in this mbuf to which the function should be applied.
873.Pp
874The function should return zero to indicate success;
875otherwise, if an error is indicated, then
876.Fn m_apply
877will return the error and stop iterating through the
878.Vt mbuf chain .
879.\"
880.It Fn m_getptr mbuf loc off
881Return a pointer to the mbuf containing the data located at
882.Fa loc
883bytes from the beginning of the
884.Vt mbuf chain .
885The corresponding offset into the mbuf will be stored in
886.Fa *off .
887.It Fn m_defrag m0 how
888Defragment an mbuf chain, returning the shortest possible
889chain of mbufs and clusters.
890If allocation fails and this can not be completed,
891.Dv NULL
892will be returned and the original chain will be unchanged.
893Upon success, the original chain will be freed and the new
894chain will be returned.
895.Fa how
896should be either
897.Dv M_TRYWAIT
898or
899.Dv M_DONTWAIT ,
900depending on the caller's preference.
901.Pp
902This function is especially useful in network drivers, where
903certain long mbuf chains must be shortened before being added
904to TX descriptor lists.
905.El
906.Sh HARDWARE-ASSISTED CHECKSUM CALCULATION
907This section currently applies to TCP/IP only.
908In order to save the host CPU resources, computing checksums is
909offloaded to the network interface hardware if possible.
910The
911.Va m_pkthdr
912member of the leading
913.Vt mbuf
914of a packet contains two fields used for that purpose,
915.Vt int Va csum_flags
916and
917.Vt int Va csum_data .
918The meaning of those fields depends on the direction a packet flows in,
919and on whether the packet is fragmented.
920Henceforth,
921.Va csum_flags
922or
923.Va csum_data
924of a packet
925will denote the corresponding field of the
926.Va m_pkthdr
927member of the leading
928.Vt mbuf
929in the
930.Vt mbuf chain
931containing the packet.
932.Pp
933On output, checksum offloading is attempted after the outgoing
934interface has been determined for a packet.
935The interface-specific field
936.Va ifnet.if_data.ifi_hwassist
937(see
938.Xr ifnet 9 )
939is consulted for the capabilities of the interface to assist in
940computing checksums.
941The
942.Va csum_flags
943field of the packet header is set to indicate which actions the interface
944is supposed to perform on it.
945The actions unsupported by the network interface are done in the
946software prior to passing the packet down to the interface driver;
947such actions will never be requested through
948.Va csum_flags .
949.Pp
950The flags demanding a particular action from an interface are as follows:
951.Bl -tag -width ".Dv CSUM_TCP" -offset indent
952.It Dv CSUM_IP
953The IP header checksum is to be computed and stored in the
954corresponding field of the packet.
955The hardware is expected to know the format of an IP header
956to determine the offset of the IP checksum field.
957.It Dv CSUM_TCP
958The TCP checksum is to be computed.
959(See below.)
960.It Dv CSUM_UDP
961The UDP checksum is to be computed.
962(See below.)
963.El
964.Pp
965Should a TCP or UDP checksum be offloaded to the hardware,
966the field
967.Va csum_data
968will contain the byte offset of the checksum field relative to the
969end of the IP header.
970In this case, the checksum field will be initially
971set by the TCP/IP module to the checksum of the pseudo header
972defined by the TCP and UDP specifications.
973.Pp
974For outbound packets which have been fragmented
975by the host CPU, the following will also be true,
976regardless of the checksum flag settings:
977.Bl -bullet -offset indent
978.It
979all fragments will have the flag
980.Dv M_FRAG
981set in their
982.Va m_flags
983field;
984.It
985the first and the last fragments in the chain will have
986.Dv M_FIRSTFRAG
987or
988.Dv M_LASTFRAG
989set in their
990.Va m_flags ,
991correspondingly;
992.It
993the first fragment in the chain will have the total number
994of fragments contained in its
995.Va csum_data
996field.
997.El
998.Pp
999The last rule for fragmented packets takes precedence over the one
1000for a TCP or UDP checksum.
1001Nevertheless, offloading a TCP or UDP checksum is possible for a
1002fragmented packet if the flag
1003.Dv CSUM_IP_FRAGS
1004is set in the field
1005.Va ifnet.if_data.ifi_hwassist
1006associated with the network interface.
1007However, in this case the interface is expected to figure out
1008the location of the checksum field within the sequence of fragments
1009by itself because
1010.Va csum_data
1011contains a fragment count instead of a checksum offset value.
1012.Pp
1013On input, an interface indicates the actions it has performed
1014on a packet by setting one or more of the following flags in
1015.Va csum_flags
1016associated with the packet:
1017.Bl -tag -width ".Dv CSUM_IP_CHECKED" -offset indent
1018.It Dv CSUM_IP_CHECKED
1019The IP header checksum has been computed.
1020.It Dv CSUM_IP_VALID
1021The IP header has a valid checksum.
1022This flag can appear only in combination with
1023.Dv CSUM_IP_CHECKED .
1024.It Dv CSUM_DATA_VALID
1025The checksum of the data portion of the IP packet has been computed
1026and stored in the field
1027.Va csum_data
1028in network byte order.
1029.It Dv CSUM_PSEUDO_HDR
1030Can be set only along with
1031.Dv CSUM_DATA_VALID
1032to indicate that the IP data checksum found in
1033.Va csum_data
1034allows for the pseudo header defined by the TCP and UDP specifications.
1035Otherwise the checksum of the pseudo header must be calculated by
1036the host CPU and added to
1037.Va csum_data
1038to obtain the final checksum to be used for TCP or UDP validation purposes.
1039.El
1040.Pp
1041If a particular network interface just indicates success or
1042failure of TCP or UDP checksum validation without returning
1043the exact value of the checksum to the host CPU, its driver can mark
1044.Dv CSUM_DATA_VALID
1045and
1046.Dv CSUM_PSEUDO_HDR
1047in
1048.Va csum_flags ,
1049and set
1050.Va csum_data
1051to
1052.Li 0xFFFF
1053hexadecimal to indicate a valid checksum.
1054It is a peculiarity of the algorithm used that the Internet checksum
1055calculated over any valid packet will be
1056.Li 0xFFFF
1057as long as the original checksum field is included.
1058.Pp
1059For inbound packets which are IP fragments, all
1060.Va csum_data
1061fields will be summed during reassembly to obtain the final checksum
1062value passed to an upper layer in the
1063.Va csum_data
1064field of the reassembled packet.
1065The
1066.Va csum_flags
1067fields of all fragments will be consolidated using logical AND
1068to obtain the final value for
1069.Va csum_flags .
1070Thus, in order to successfully
1071offload checksum computation for fragmented data,
1072all fragments should have the same value of
1073.Va csum_flags .
1074.Sh STRESS TESTING
1075When running a kernel compiled with the option
1076.Dv MBUF_STRESS_TEST ,
1077the following
1078.Xr sysctl 8 Ns
1079-controlled options may be used to create
1080various failure/extreme cases for testing of network drivers
1081and other parts of the kernel that rely on
1082.Vt mbufs .
1083.Bl -tag -width ident
1084.It Va net.inet.ip.mbuf_frag_size
1085Causes
1086.Fn ip_output
1087to fragment outgoing
1088.Vt mbuf chains
1089into fragments of the specified size.
1090Setting this variable to 1 is an excellent way to
1091test the long
1092.Vt mbuf chain
1093handling ability of network drivers.
1094.It Va kern.ipc.m_defragrandomfailures
1095Causes the function
1096.Fn m_defrag
1097to randomly fail, returning
1098.Dv NULL .
1099Any piece of code which uses
1100.Fn m_defrag
1101should be tested with this feature.
1102.El
1103.Sh RETURN VALUES
1104See above.
1105.Sh SEE ALSO
1106.Xr ifnet 9 ,
1107.Xr mbuf_tags 9
1108.Sh HISTORY
1109.\" Please correct me if I'm wrong
1110.Vt Mbufs
1111appeared in an early version of
1112.Bx .
1113Besides being used for network packets, they were used
1114to store various dynamic structures, such as routing table
1115entries, interface addresses, protocol control blocks, etc.
1116.Sh AUTHORS
1117The original
1118.Nm
1119manual page was written by Yar Tikhiy.
1120