xref: /freebsd/share/man/man9/mbuf.9 (revision f856af0466c076beef4ea9b15d088e1119a945b8)
1.\" Copyright (c) 2000 FreeBSD Inc.
2.\" All rights reserved.
3.\"
4.\" Redistribution and use in source and binary forms, with or without
5.\" modification, are permitted provided that the following conditions
6.\" are met:
7.\" 1. Redistributions of source code must retain the above copyright
8.\"    notice, this list of conditions and the following disclaimer.
9.\" 2. Redistributions in binary form must reproduce the above copyright
10.\"    notice, this list of conditions and the following disclaimer in the
11.\"    documentation and/or other materials provided with the distribution.
12.\"
13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16.\" ARE DISCLAIMED.  IN NO EVENT SHALL [your name] OR CONTRIBUTORS BE LIABLE
17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23.\" SUCH DAMAGE.
24.\"
25.\" $FreeBSD$
26.\"
27.Dd July 24, 2006
28.Dt MBUF 9
29.Os
30.\"
31.Sh NAME
32.Nm mbuf
33.Nd "memory management in the kernel IPC subsystem"
34.\"
35.Sh SYNOPSIS
36.In sys/param.h
37.In sys/systm.h
38.In sys/mbuf.h
39.\"
40.Ss Mbuf allocation macros
41.Fn MGET "struct mbuf *mbuf" "int how" "short type"
42.Fn MGETHDR "struct mbuf *mbuf" "int how" "short type"
43.Fn MCLGET "struct mbuf *mbuf" "int how"
44.Fo MEXTADD
45.Fa "struct mbuf *mbuf"
46.Fa "caddr_t buf"
47.Fa "u_int size"
48.Fa "void (*free)(void *opt_args)"
49.Fa "void *opt_args"
50.Fa "short flags"
51.Fa "int type"
52.Fc
53.Fn MEXTFREE "struct mbuf *mbuf"
54.Fn MFREE "struct mbuf *mbuf" "struct mbuf *successor"
55.\"
56.Ss Mbuf utility macros
57.Fn mtod "struct mbuf *mbuf" "type"
58.Fn M_ALIGN "struct mbuf *mbuf" "u_int len"
59.Fn MH_ALIGN "struct mbuf *mbuf" "u_int len"
60.Ft int
61.Fn M_LEADINGSPACE "struct mbuf *mbuf"
62.Ft int
63.Fn M_TRAILINGSPACE "struct mbuf *mbuf"
64.Fn M_MOVE_PKTHDR "struct mbuf *to" "struct mbuf *from"
65.Fn M_PREPEND "struct mbuf *mbuf" "int len" "int how"
66.Fn MCHTYPE "struct mbuf *mbuf" "u_int type"
67.Ft int
68.Fn M_WRITABLE "struct mbuf *mbuf"
69.\"
70.Ss Mbuf allocation functions
71.Ft struct mbuf *
72.Fn m_get "int how" "int type"
73.Ft struct mbuf *
74.Fn m_getm "struct mbuf *orig" "int len" "int how" "int type"
75.Ft struct mbuf *
76.Fn m_getcl "int how" "short type" "int flags"
77.Ft struct mbuf *
78.Fn m_getclr "int how" "int type"
79.Ft struct mbuf *
80.Fn m_gethdr "int how" "int type"
81.Ft struct mbuf *
82.Fn m_free "struct mbuf *mbuf"
83.Ft void
84.Fn m_freem "struct mbuf *mbuf"
85.\"
86.Ss Mbuf utility functions
87.Ft void
88.Fn m_adj "struct mbuf *mbuf" "int len"
89.Ft void
90.Fn m_align "struct mbuf *mbuf" "int len"
91.Ft int
92.Fn m_append "struct mbuf *mbuf" "int len" "c_caddr_t cp"
93.Ft struct mbuf *
94.Fn m_prepend "struct mbuf *mbuf" "int len" "int how"
95.Ft struct mbuf *
96.Fn m_copyup "struct mbuf *mbuf" "int len" "int dstoff"
97.Ft struct mbuf *
98.Fn m_pullup "struct mbuf *mbuf" "int len"
99.Ft struct mbuf *
100.Fn m_copym "struct mbuf *mbuf" "int offset" "int len" "int how"
101.Ft struct mbuf *
102.Fn m_copypacket "struct mbuf *mbuf" "int how"
103.Ft struct mbuf *
104.Fn m_dup "struct mbuf *mbuf" "int how"
105.Ft void
106.Fn m_copydata "const struct mbuf *mbuf" "int offset" "int len" "caddr_t buf"
107.Ft void
108.Fn m_copyback "struct mbuf *mbuf" "int offset" "int len" "caddr_t buf"
109.Ft struct mbuf *
110.Fo m_devget
111.Fa "char *buf"
112.Fa "int len"
113.Fa "int offset"
114.Fa "struct ifnet *ifp"
115.Fa "void (*copy)(char *from, caddr_t to, u_int len)"
116.Fc
117.Ft void
118.Fn m_cat "struct mbuf *m" "struct mbuf *n"
119.Ft u_int
120.Fn m_fixhdr "struct mbuf *mbuf"
121.Ft void
122.Fn m_dup_pkthdr "struct mbuf *to" "struct mbuf *from"
123.Ft void
124.Fn m_move_pkthdr "struct mbuf *to" "struct mbuf *from"
125.Ft u_int
126.Fn m_length "struct mbuf *mbuf" "struct mbuf **last"
127.Ft struct mbuf *
128.Fn m_split "struct mbuf *mbuf" "int len" "int how"
129.Ft int
130.Fn m_apply "struct mbuf *mbuf" "int off" "int len" "int (*f)(void *arg, void *data, u_int len)" "void *arg"
131.Ft struct mbuf *
132.Fn m_getptr "struct mbuf *mbuf" "int loc" "int *off"
133.Ft struct mbuf *
134.Fn m_defrag "struct mbuf *m0" "int how"
135.Ft struct mbuf *
136.Fn m_unshare "struct mbuf *m0" "int how"
137.\"
138.Sh DESCRIPTION
139An
140.Vt mbuf
141is a basic unit of memory management in the kernel IPC subsystem.
142Network packets and socket buffers are stored in
143.Vt mbufs .
144A network packet may span multiple
145.Vt mbufs
146arranged into a
147.Vt mbuf chain
148(linked list),
149which allows adding or trimming
150network headers with little overhead.
151.Pp
152While a developer should not bother with
153.Vt mbuf
154internals without serious
155reason in order to avoid incompatibilities with future changes, it
156is useful to understand the general structure of an
157.Vt mbuf .
158.Pp
159An
160.Vt mbuf
161consists of a variable-sized header and a small internal
162buffer for data.
163The total size of an
164.Vt mbuf ,
165.Dv MSIZE ,
166is a constant defined in
167.In sys/param.h .
168The
169.Vt mbuf
170header includes:
171.Pp
172.Bl -tag -width "m_nextpkt" -offset indent
173.It Va m_next
174.Pq Vt struct mbuf *
175A pointer to the next
176.Vt mbuf
177in the
178.Vt mbuf chain .
179.It Va m_nextpkt
180.Pq Vt struct mbuf *
181A pointer to the next
182.Vt mbuf chain
183in the queue.
184.It Va m_data
185.Pq Vt caddr_t
186A pointer to data attached to this
187.Vt mbuf .
188.It Va m_len
189.Pq Vt int
190The length of the data.
191.It Va m_type
192.Pq Vt short
193The type of the data.
194.It Va m_flags
195.Pq Vt int
196The
197.Vt mbuf
198flags.
199.El
200.Pp
201The
202.Vt mbuf
203flag bits are defined as follows:
204.Bd -literal
205/* mbuf flags */
206#define	M_EXT		0x0001	/* has associated external storage */
207#define	M_PKTHDR	0x0002	/* start of record */
208#define	M_EOR		0x0004	/* end of record */
209#define	M_RDONLY	0x0008	/* associated data marked read-only */
210#define	M_PROTO1	0x0010	/* protocol-specific */
211#define	M_PROTO2	0x0020 	/* protocol-specific */
212#define	M_PROTO3	0x0040	/* protocol-specific */
213#define	M_PROTO4	0x0080	/* protocol-specific */
214#define	M_PROTO5	0x0100	/* protocol-specific */
215#define	M_PROTO6	0x4000	/* protocol-specific (avoid M_BCAST conflict) */
216#define	M_FREELIST	0x8000	/* mbuf is on the free list */
217
218/* mbuf pkthdr flags (also stored in m_flags) */
219#define	M_BCAST		0x0200	/* send/received as link-level broadcast */
220#define	M_MCAST		0x0400	/* send/received as link-level multicast */
221#define	M_FRAG		0x0800	/* packet is fragment of larger packet */
222#define	M_FIRSTFRAG	0x1000	/* packet is first fragment */
223#define	M_LASTFRAG	0x2000	/* packet is last fragment */
224.Ed
225.Pp
226The available
227.Vt mbuf
228types are defined as follows:
229.Bd -literal
230/* mbuf types */
231#define	MT_DATA		1	/* dynamic (data) allocation */
232#define	MT_HEADER	MT_DATA	/* packet header */
233#define	MT_SONAME	8	/* socket name */
234#define	MT_CONTROL	14	/* extra-data protocol message */
235#define	MT_OOBDATA	15	/* expedited data */
236.Ed
237.Pp
238If the
239.Dv M_PKTHDR
240flag is set, a
241.Vt struct pkthdr Va m_pkthdr
242is added to the
243.Vt mbuf
244header.
245It contains a pointer to the interface
246the packet has been received from
247.Pq Vt struct ifnet Va *rcvif ,
248and the total packet length
249.Pq Vt int Va len .
250Optionally, it may also contain an attached list of packet tags
251.Pq Vt "struct m_tag" .
252See
253.Xr mbuf_tags 9
254for details.
255Fields used in offloading checksum calculation to the hardware are kept in
256.Va m_pkthdr
257as well.
258See
259.Sx HARDWARE-ASSISTED CHECKSUM CALCULATION
260for details.
261.Pp
262If small enough, data is stored in the internal data buffer of an
263.Vt mbuf .
264If the data is sufficiently large, another
265.Vt mbuf
266may be added to the
267.Vt mbuf chain ,
268or external storage may be associated with the
269.Vt mbuf .
270.Dv MHLEN
271bytes of data can fit into an
272.Vt mbuf
273with the
274.Dv M_PKTHDR
275flag set,
276.Dv MLEN
277bytes can otherwise.
278.Pp
279If external storage is being associated with an
280.Vt mbuf ,
281the
282.Va m_ext
283header is added at the cost of losing the internal data buffer.
284It includes a pointer to external storage, the size of the storage,
285a pointer to a function used for freeing the storage,
286a pointer to an optional argument that can be passed to the function,
287and a pointer to a reference counter.
288An
289.Vt mbuf
290using external storage has the
291.Dv M_EXT
292flag set.
293.Pp
294The system supplies a macro for allocating the desired external storage
295buffer,
296.Dv MEXTADD .
297.Pp
298The allocation and management of the reference counter is handled by the
299subsystem.
300.Pp
301The system also supplies a default type of external storage buffer called an
302.Vt mbuf cluster .
303.Vt Mbuf clusters
304can be allocated and configured with the use of the
305.Dv MCLGET
306macro.
307Each
308.Vt mbuf cluster
309is
310.Dv MCLBYTES
311in size, where MCLBYTES is a machine-dependent constant.
312The system defines an advisory macro
313.Dv MINCLSIZE ,
314which is the smallest amount of data to put into an
315.Vt mbuf cluster .
316It is equal to the sum of
317.Dv MLEN
318and
319.Dv MHLEN .
320It is typically preferable to store data into the data region of an
321.Vt mbuf ,
322if size permits, as opposed to allocating a separate
323.Vt mbuf cluster
324to hold the same data.
325.\"
326.Ss Macros and Functions
327There are numerous predefined macros and functions that provide the
328developer with common utilities.
329.\"
330.Bl -ohang -offset indent
331.It Fn mtod mbuf type
332Convert an
333.Fa mbuf
334pointer to a data pointer.
335The macro expands to the data pointer cast to the pointer of the specified
336.Fa type .
337.Sy Note :
338It is advisable to ensure that there is enough contiguous data in
339.Fa mbuf .
340See
341.Fn m_pullup
342for details.
343.It Fn MGET mbuf how type
344Allocate an
345.Vt mbuf
346and initialize it to contain internal data.
347.Fa mbuf
348will point to the allocated
349.Vt mbuf
350on success, or be set to
351.Dv NULL
352on failure.
353The
354.Fa how
355argument is to be set to
356.Dv M_TRYWAIT
357or
358.Dv M_DONTWAIT .
359It specifies whether the caller is willing to block if necessary.
360If
361.Fa how
362is set to
363.Dv M_TRYWAIT ,
364a failed allocation will result in the caller being put
365to sleep for a designated
366kern.ipc.mbuf_wait
367.Xr ( sysctl 8
368tunable)
369number of ticks.
370A number of other functions and macros related to
371.Vt mbufs
372have the same argument because they may
373at some point need to allocate new
374.Vt mbufs .
375.Pp
376Programmers should be careful not to confuse the
377.Vt mbuf
378allocation flag
379.Dv M_DONTWAIT
380with the
381.Xr malloc 9
382allocation flag,
383.Dv M_NOWAIT .
384They are not the same.
385.It Fn MGETHDR mbuf how type
386Allocate an
387.Vt mbuf
388and initialize it to contain a packet header
389and internal data.
390See
391.Fn MGET
392for details.
393.It Fn MCLGET mbuf how
394Allocate and attach an
395.Vt mbuf cluster
396to
397.Fa mbuf .
398If the macro fails, the
399.Dv M_EXT
400flag will not be set in
401.Fa mbuf .
402.It Fn M_ALIGN mbuf len
403Set the pointer
404.Fa mbuf->m_data
405to place an object of the size
406.Fa len
407at the end of the internal data area of
408.Fa mbuf ,
409long word aligned.
410Applicable only if
411.Fa mbuf
412is newly allocated with
413.Fn MGET
414or
415.Fn m_get .
416.It Fn MH_ALIGN mbuf len
417Serves the same purpose as
418.Fn M_ALIGN
419does, but only for
420.Fa mbuf
421newly allocated with
422.Fn MGETHDR
423or
424.Fn m_gethdr ,
425or initialized by
426.Fn m_dup_pkthdr
427or
428.Fn m_move_pkthdr .
429.It Fn m_align mbuf len
430Services the same purpose as
431.Fn M_ALIGN
432but handles any type of mbuf.
433.It Fn M_LEADINGSPACE mbuf
434Returns the number of bytes available before the beginning
435of data in
436.Fa mbuf .
437.It Fn M_TRAILINGSPACE mbuf
438Returns the number of bytes available after the end of data in
439.Fa mbuf .
440.It Fn M_PREPEND mbuf len how
441This macro operates on an
442.Vt mbuf chain .
443It is an optimized wrapper for
444.Fn m_prepend
445that can make use of possible empty space before data
446(e.g.\& left after trimming of a link-layer header).
447The new
448.Vt mbuf chain
449pointer or
450.Dv NULL
451is in
452.Fa mbuf
453after the call.
454.It Fn M_MOVE_PKTHDR to from
455Using this macro is equivalent to calling
456.Fn m_move_pkthdr to from .
457.It Fn M_WRITABLE mbuf
458This macro will evaluate true if
459.Fa mbuf
460is not marked
461.Dv M_RDONLY
462and if either
463.Fa mbuf
464does not contain external storage or,
465if it does,
466then if the reference count of the storage is not greater than 1.
467The
468.Dv M_RDONLY
469flag can be set in
470.Fa mbuf->m_flags .
471This can be achieved during setup of the external storage,
472by passing the
473.Dv M_RDONLY
474bit as a
475.Fa flags
476argument to the
477.Fn MEXTADD
478macro, or can be directly set in individual
479.Vt mbufs .
480.It Fn MCHTYPE mbuf type
481Change the type of
482.Fa mbuf
483to
484.Fa type .
485This is a relatively expensive operation and should be avoided.
486.El
487.Pp
488The functions are:
489.Bl -ohang -offset indent
490.It Fn m_get how type
491A function version of
492.Fn MGET
493for non-critical paths.
494.It Fn m_getm orig len how type
495Allocate
496.Fa len
497bytes worth of
498.Vt mbufs
499and
500.Vt mbuf clusters
501if necessary and append the resulting allocated
502.Vt mbuf chain
503to the
504.Vt mbuf chain
505.Fa orig ,
506if it is
507.No non- Ns Dv NULL .
508If the allocation fails at any point,
509free whatever was allocated and return
510.Dv NULL .
511If
512.Fa orig
513is
514.No non- Ns Dv NULL ,
515it will not be freed.
516It is possible to use
517.Fn m_getm
518to either append
519.Fa len
520bytes to an existing
521.Vt mbuf
522or
523.Vt mbuf chain
524(for example, one which may be sitting in a pre-allocated ring)
525or to simply perform an all-or-nothing
526.Vt mbuf
527and
528.Vt mbuf cluster
529allocation.
530.It Fn m_gethdr how type
531A function version of
532.Fn MGETHDR
533for non-critical paths.
534.It Fn m_getcl how type flags
535Fetch an
536.Vt mbuf
537with a
538.Vt mbuf cluster
539attached to it.
540If one of the allocations fails, the entire allocation fails.
541This routine is the preferred way of fetching both the
542.Vt mbuf
543and
544.Vt mbuf cluster
545together, as it avoids having to unlock/relock between allocations.
546Returns
547.Dv NULL
548on failure.
549.It Fn m_getclr how type
550Allocate an
551.Vt mbuf
552and zero out the data region.
553.It Fn m_free mbuf
554Frees
555.Vt mbuf .
556Returns
557.Va m_next
558of the freed
559.Vt mbuf .
560.El
561.Pp
562The functions below operate on
563.Vt mbuf chains .
564.Bl -ohang -offset indent
565.It Fn m_freem mbuf
566Free an entire
567.Vt mbuf chain ,
568including any external storage.
569.\"
570.It Fn m_adj mbuf len
571Trim
572.Fa len
573bytes from the head of an
574.Vt mbuf chain
575if
576.Fa len
577is positive, from the tail otherwise.
578.\"
579.It Fn m_append mbuf len cp
580Append
581.Vt len
582bytes of data
583.Vt cp
584to the
585.Vt mbuf chain .
586Extend the mbuf chain if the new data does not fit in
587existing space.
588.\"
589.It Fn m_prepend mbuf len how
590Allocate a new
591.Vt mbuf
592and prepend it to the
593.Vt mbuf chain ,
594handle
595.Dv M_PKTHDR
596properly.
597.Sy Note :
598It does not allocate any
599.Vt mbuf clusters ,
600so
601.Fa len
602must be less than
603.Dv MLEN
604or
605.Dv MHLEN ,
606depending on the
607.Dv M_PKTHDR
608flag setting.
609.\"
610.It Fn m_copyup mbuf len dstoff
611Similar to
612.Fn m_pullup
613but copies
614.Fa len
615bytes of data into a new mbuf at
616.Fa dstoff
617bytes into the mbuf.
618The
619.Fa dstoff
620argument aligns the data and leaves room for a link layer header.
621Returns the new
622.Vt mbuf chain
623on success,
624and frees the
625.Vt mbuf chain
626and returns
627.Dv NULL
628on failure.
629.Sy Note :
630The function does not allocate
631.Vt mbuf clusters ,
632so
633.Fa len + dstoff
634must be less than
635.Dv MHLEN .
636.\"
637.It Fn m_pullup mbuf len
638Arrange that the first
639.Fa len
640bytes of an
641.Vt mbuf chain
642are contiguous and lay in the data area of
643.Fa mbuf ,
644so they are accessible with
645.Fn mtod mbuf type .
646It is important to remember that this may involve
647reallocating some mbufs and moving data so all pointers
648referencing data within the old mbuf chain
649must be recalculated or made invalid.
650Return the new
651.Vt mbuf chain
652on success,
653.Dv NULL
654on failure
655(the
656.Vt mbuf chain
657is freed in this case).
658.Sy Note :
659It does not allocate any
660.Vt mbuf clusters ,
661so
662.Fa len
663must be less than
664.Dv MHLEN .
665.\"
666.It Fn m_copym mbuf offset len how
667Make a copy of an
668.Vt mbuf chain
669starting
670.Fa offset
671bytes from the beginning, continuing for
672.Fa len
673bytes.
674If
675.Fa len
676is
677.Dv M_COPYALL ,
678copy to the end of the
679.Vt mbuf chain .
680.Sy Note :
681The copy is read-only, because the
682.Vt mbuf clusters
683are not copied, only their reference counts are incremented.
684.\"
685.It Fn m_copypacket mbuf how
686Copy an entire packet including header, which must be present.
687This is an optimized version of the common case
688.Fn m_copym mbuf 0 M_COPYALL how .
689.Sy Note :
690the copy is read-only, because the
691.Vt mbuf clusters
692are not copied, only their reference counts are incremented.
693.\"
694.It Fn m_dup mbuf how
695Copy a packet header
696.Vt mbuf chain
697into a completely new
698.Vt mbuf chain ,
699including copying any
700.Vt mbuf clusters .
701Use this instead of
702.Fn m_copypacket
703when you need a writable copy of an
704.Vt mbuf chain .
705.\"
706.It Fn m_copydata mbuf offset len buf
707Copy data from an
708.Vt mbuf chain
709starting
710.Fa off
711bytes from the beginning, continuing for
712.Fa len
713bytes, into the indicated buffer
714.Fa buf .
715.\"
716.It Fn m_copyback mbuf offset len buf
717Copy
718.Fa len
719bytes from the buffer
720.Fa buf
721back into the indicated
722.Vt mbuf chain ,
723starting at
724.Fa offset
725bytes from the beginning of the
726.Vt mbuf chain ,
727extending the
728.Vt mbuf chain
729if necessary.
730.Sy Note :
731It does not allocate any
732.Vt mbuf clusters ,
733just adds
734.Vt mbufs
735to the
736.Vt mbuf chain .
737It is safe to set
738.Fa offset
739beyond the current
740.Vt mbuf chain
741end: zeroed
742.Vt mbufs
743will be allocated to fill the space.
744.\"
745.It Fn m_length mbuf last
746Return the length of the
747.Vt mbuf chain ,
748and optionally a pointer to the last
749.Vt mbuf .
750.\"
751.It Fn m_dup_pkthdr to from how
752Upon the function's completion, the
753.Vt mbuf
754.Fa to
755will contain an identical copy of
756.Fa from->m_pkthdr
757and the per-packet attributes found in the
758.Vt mbuf chain
759.Fa from .
760The
761.Vt mbuf
762.Fa from
763must have the flag
764.Dv M_PKTHDR
765initially set, and
766.Fa to
767must be empty on entry.
768.\"
769.It Fn m_move_pkthdr to from
770Move
771.Va m_pkthdr
772and the per-packet attributes from the
773.Vt mbuf chain
774.Fa from
775to the
776.Vt mbuf
777.Fa to .
778The
779.Vt mbuf
780.Fa from
781must have the flag
782.Dv M_PKTHDR
783initially set, and
784.Fa to
785must be empty on entry.
786Upon the function's completion,
787.Fa from
788will have the flag
789.Dv M_PKTHDR
790and the per-packet attributes cleared.
791.\"
792.It Fn m_fixhdr mbuf
793Set the packet-header length to the length of the
794.Vt mbuf chain .
795.\"
796.It Fn m_devget buf len offset ifp copy
797Copy data from a device local memory pointed to by
798.Fa buf
799to an
800.Vt mbuf chain .
801The copy is done using a specified copy routine
802.Fa copy ,
803or
804.Fn bcopy
805if
806.Fa copy
807is
808.Dv NULL .
809.\"
810.It Fn m_cat m n
811Concatenate
812.Fa n
813to
814.Fa m .
815Both
816.Vt mbuf chains
817must be of the same type.
818.Fa N
819is still valid after the function returned.
820.Sy Note :
821It does not handle
822.Dv M_PKTHDR
823and friends.
824.\"
825.It Fn m_split mbuf len how
826Partition an
827.Vt mbuf chain
828in two pieces, returning the tail:
829all but the first
830.Fa len
831bytes.
832In case of failure, it returns
833.Dv NULL
834and attempts to restore the
835.Vt mbuf chain
836to its original state.
837.\"
838.It Fn m_apply mbuf off len f arg
839Apply a function to an
840.Vt mbuf chain ,
841at offset
842.Fa off ,
843for length
844.Fa len
845bytes.
846Typically used to avoid calls to
847.Fn m_pullup
848which would otherwise be unnecessary or undesirable.
849.Fa arg
850is a convenience argument which is passed to the callback function
851.Fa f .
852.Pp
853Each time
854.Fn f
855is called, it will be passed
856.Fa arg ,
857a pointer to the
858.Fa data
859in the current mbuf, and the length
860.Fa len
861of the data in this mbuf to which the function should be applied.
862.Pp
863The function should return zero to indicate success;
864otherwise, if an error is indicated, then
865.Fn m_apply
866will return the error and stop iterating through the
867.Vt mbuf chain .
868.\"
869.It Fn m_getptr mbuf loc off
870Return a pointer to the mbuf containing the data located at
871.Fa loc
872bytes from the beginning of the
873.Vt mbuf chain .
874The corresponding offset into the mbuf will be stored in
875.Fa *off .
876.It Fn m_defrag m0 how
877Defragment an mbuf chain, returning the shortest possible
878chain of mbufs and clusters.
879If allocation fails and this can not be completed,
880.Dv NULL
881will be returned and the original chain will be unchanged.
882Upon success, the original chain will be freed and the new
883chain will be returned.
884.Fa how
885should be either
886.Dv M_TRYWAIT
887or
888.Dv M_DONTWAIT ,
889depending on the caller's preference.
890.Pp
891This function is especially useful in network drivers, where
892certain long mbuf chains must be shortened before being added
893to TX descriptor lists.
894.It Fn m_unshare m0 how
895Create a version of the specified mbuf chain whose
896contents can be safely modified without affecting other users.
897If allocation fails and this operation can not be completed,
898.Dv NULL
899will be returned.
900The original mbuf chain is always reclaimed and the reference
901count of any shared mbuf clusters is decremented.
902.Fa how
903should be either
904.Dv M_TRYWAIT
905or
906.Dv M_DONTWAIT ,
907depending on the caller's preference.
908As a side-effect of this process the returned
909mbuf chain may be compacted.
910.Pp
911This function is especially useful in the transmit path of
912network code, when data must be encrypted or otherwise
913altered prior to transmission.
914.El
915.Sh HARDWARE-ASSISTED CHECKSUM CALCULATION
916This section currently applies to TCP/IP only.
917In order to save the host CPU resources, computing checksums is
918offloaded to the network interface hardware if possible.
919The
920.Va m_pkthdr
921member of the leading
922.Vt mbuf
923of a packet contains two fields used for that purpose,
924.Vt int Va csum_flags
925and
926.Vt int Va csum_data .
927The meaning of those fields depends on the direction a packet flows in,
928and on whether the packet is fragmented.
929Henceforth,
930.Va csum_flags
931or
932.Va csum_data
933of a packet
934will denote the corresponding field of the
935.Va m_pkthdr
936member of the leading
937.Vt mbuf
938in the
939.Vt mbuf chain
940containing the packet.
941.Pp
942On output, checksum offloading is attempted after the outgoing
943interface has been determined for a packet.
944The interface-specific field
945.Va ifnet.if_data.ifi_hwassist
946(see
947.Xr ifnet 9 )
948is consulted for the capabilities of the interface to assist in
949computing checksums.
950The
951.Va csum_flags
952field of the packet header is set to indicate which actions the interface
953is supposed to perform on it.
954The actions unsupported by the network interface are done in the
955software prior to passing the packet down to the interface driver;
956such actions will never be requested through
957.Va csum_flags .
958.Pp
959The flags demanding a particular action from an interface are as follows:
960.Bl -tag -width ".Dv CSUM_TCP" -offset indent
961.It Dv CSUM_IP
962The IP header checksum is to be computed and stored in the
963corresponding field of the packet.
964The hardware is expected to know the format of an IP header
965to determine the offset of the IP checksum field.
966.It Dv CSUM_TCP
967The TCP checksum is to be computed.
968(See below.)
969.It Dv CSUM_UDP
970The UDP checksum is to be computed.
971(See below.)
972.El
973.Pp
974Should a TCP or UDP checksum be offloaded to the hardware,
975the field
976.Va csum_data
977will contain the byte offset of the checksum field relative to the
978end of the IP header.
979In this case, the checksum field will be initially
980set by the TCP/IP module to the checksum of the pseudo header
981defined by the TCP and UDP specifications.
982.Pp
983For outbound packets which have been fragmented
984by the host CPU, the following will also be true,
985regardless of the checksum flag settings:
986.Bl -bullet -offset indent
987.It
988all fragments will have the flag
989.Dv M_FRAG
990set in their
991.Va m_flags
992field;
993.It
994the first and the last fragments in the chain will have
995.Dv M_FIRSTFRAG
996or
997.Dv M_LASTFRAG
998set in their
999.Va m_flags ,
1000correspondingly;
1001.It
1002the first fragment in the chain will have the total number
1003of fragments contained in its
1004.Va csum_data
1005field.
1006.El
1007.Pp
1008The last rule for fragmented packets takes precedence over the one
1009for a TCP or UDP checksum.
1010Nevertheless, offloading a TCP or UDP checksum is possible for a
1011fragmented packet if the flag
1012.Dv CSUM_IP_FRAGS
1013is set in the field
1014.Va ifnet.if_data.ifi_hwassist
1015associated with the network interface.
1016However, in this case the interface is expected to figure out
1017the location of the checksum field within the sequence of fragments
1018by itself because
1019.Va csum_data
1020contains a fragment count instead of a checksum offset value.
1021.Pp
1022On input, an interface indicates the actions it has performed
1023on a packet by setting one or more of the following flags in
1024.Va csum_flags
1025associated with the packet:
1026.Bl -tag -width ".Dv CSUM_IP_CHECKED" -offset indent
1027.It Dv CSUM_IP_CHECKED
1028The IP header checksum has been computed.
1029.It Dv CSUM_IP_VALID
1030The IP header has a valid checksum.
1031This flag can appear only in combination with
1032.Dv CSUM_IP_CHECKED .
1033.It Dv CSUM_DATA_VALID
1034The checksum of the data portion of the IP packet has been computed
1035and stored in the field
1036.Va csum_data
1037in network byte order.
1038.It Dv CSUM_PSEUDO_HDR
1039Can be set only along with
1040.Dv CSUM_DATA_VALID
1041to indicate that the IP data checksum found in
1042.Va csum_data
1043allows for the pseudo header defined by the TCP and UDP specifications.
1044Otherwise the checksum of the pseudo header must be calculated by
1045the host CPU and added to
1046.Va csum_data
1047to obtain the final checksum to be used for TCP or UDP validation purposes.
1048.El
1049.Pp
1050If a particular network interface just indicates success or
1051failure of TCP or UDP checksum validation without returning
1052the exact value of the checksum to the host CPU, its driver can mark
1053.Dv CSUM_DATA_VALID
1054and
1055.Dv CSUM_PSEUDO_HDR
1056in
1057.Va csum_flags ,
1058and set
1059.Va csum_data
1060to
1061.Li 0xFFFF
1062hexadecimal to indicate a valid checksum.
1063It is a peculiarity of the algorithm used that the Internet checksum
1064calculated over any valid packet will be
1065.Li 0xFFFF
1066as long as the original checksum field is included.
1067.Pp
1068For inbound packets which are IP fragments, all
1069.Va csum_data
1070fields will be summed during reassembly to obtain the final checksum
1071value passed to an upper layer in the
1072.Va csum_data
1073field of the reassembled packet.
1074The
1075.Va csum_flags
1076fields of all fragments will be consolidated using logical AND
1077to obtain the final value for
1078.Va csum_flags .
1079Thus, in order to successfully
1080offload checksum computation for fragmented data,
1081all fragments should have the same value of
1082.Va csum_flags .
1083.Sh STRESS TESTING
1084When running a kernel compiled with the option
1085.Dv MBUF_STRESS_TEST ,
1086the following
1087.Xr sysctl 8 Ns
1088-controlled options may be used to create
1089various failure/extreme cases for testing of network drivers
1090and other parts of the kernel that rely on
1091.Vt mbufs .
1092.Bl -tag -width ident
1093.It Va net.inet.ip.mbuf_frag_size
1094Causes
1095.Fn ip_output
1096to fragment outgoing
1097.Vt mbuf chains
1098into fragments of the specified size.
1099Setting this variable to 1 is an excellent way to
1100test the long
1101.Vt mbuf chain
1102handling ability of network drivers.
1103.It Va kern.ipc.m_defragrandomfailures
1104Causes the function
1105.Fn m_defrag
1106to randomly fail, returning
1107.Dv NULL .
1108Any piece of code which uses
1109.Fn m_defrag
1110should be tested with this feature.
1111.El
1112.Sh RETURN VALUES
1113See above.
1114.Sh SEE ALSO
1115.Xr ifnet 9 ,
1116.Xr mbuf_tags 9
1117.Sh HISTORY
1118.\" Please correct me if I'm wrong
1119.Vt Mbufs
1120appeared in an early version of
1121.Bx .
1122Besides being used for network packets, they were used
1123to store various dynamic structures, such as routing table
1124entries, interface addresses, protocol control blocks, etc.
1125.Sh AUTHORS
1126The original
1127.Nm
1128manual page was written by Yar Tikhiy.
1129