xref: /freebsd/share/man/man9/mbuf.9 (revision d056fa046c6a91b90cd98165face0e42a33a5173)
1.\" Copyright (c) 2000 FreeBSD Inc.
2.\" All rights reserved.
3.\"
4.\" Redistribution and use in source and binary forms, with or without
5.\" modification, are permitted provided that the following conditions
6.\" are met:
7.\" 1. Redistributions of source code must retain the above copyright
8.\"    notice, this list of conditions and the following disclaimer.
9.\" 2. Redistributions in binary form must reproduce the above copyright
10.\"    notice, this list of conditions and the following disclaimer in the
11.\"    documentation and/or other materials provided with the distribution.
12.\"
13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16.\" ARE DISCLAIMED.  IN NO EVENT SHALL [your name] OR CONTRIBUTORS BE LIABLE
17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23.\" SUCH DAMAGE.
24.\"
25.\" $FreeBSD$
26.\"
27.Dd March 15, 2006
28.Dt MBUF 9
29.Os
30.\"
31.Sh NAME
32.Nm mbuf
33.Nd "memory management in the kernel IPC subsystem"
34.\"
35.Sh SYNOPSIS
36.In sys/param.h
37.In sys/systm.h
38.In sys/mbuf.h
39.\"
40.Ss Mbuf allocation macros
41.Fn MGET "struct mbuf *mbuf" "int how" "short type"
42.Fn MGETHDR "struct mbuf *mbuf" "int how" "short type"
43.Fn MCLGET "struct mbuf *mbuf" "int how"
44.Fo MEXTADD
45.Fa "struct mbuf *mbuf"
46.Fa "caddr_t buf"
47.Fa "u_int size"
48.Fa "void (*free)(void *opt_args)"
49.Fa "void *opt_args"
50.Fa "short flags"
51.Fa "int type"
52.Fc
53.Fn MEXTFREE "struct mbuf *mbuf"
54.Fn MFREE "struct mbuf *mbuf" "struct mbuf *successor"
55.\"
56.Ss Mbuf utility macros
57.Fn mtod "struct mbuf *mbuf" "type"
58.Fn M_ALIGN "struct mbuf *mbuf" "u_int len"
59.Fn MH_ALIGN "struct mbuf *mbuf" "u_int len"
60.Ft int
61.Fn M_LEADINGSPACE "struct mbuf *mbuf"
62.Ft int
63.Fn M_TRAILINGSPACE "struct mbuf *mbuf"
64.Fn M_MOVE_PKTHDR "struct mbuf *to" "struct mbuf *from"
65.Fn M_PREPEND "struct mbuf *mbuf" "int len" "int how"
66.Fn MCHTYPE "struct mbuf *mbuf" "u_int type"
67.Ft int
68.Fn M_WRITABLE "struct mbuf *mbuf"
69.\"
70.Ss Mbuf allocation functions
71.Ft struct mbuf *
72.Fn m_get "int how" "int type"
73.Ft struct mbuf *
74.Fn m_getm "struct mbuf *orig" "int len" "int how" "int type"
75.Ft struct mbuf *
76.Fn m_getcl "int how" "short type" "int flags"
77.Ft struct mbuf *
78.Fn m_getclr "int how" "int type"
79.Ft struct mbuf *
80.Fn m_gethdr "int how" "int type"
81.Ft struct mbuf *
82.Fn m_free "struct mbuf *mbuf"
83.Ft void
84.Fn m_freem "struct mbuf *mbuf"
85.\"
86.Ss Mbuf utility functions
87.Ft void
88.Fn m_adj "struct mbuf *mbuf" "int len"
89.Ft void
90.Fn m_align "struct mbuf *mbuf" "int len"
91.Ft int
92.Fn m_append "struct mbuf *mbuf" "int len" "c_caddr_t cp"
93.Ft struct mbuf *
94.Fn m_prepend "struct mbuf *mbuf" "int len" "int how"
95.Ft struct mbuf *
96.Fn m_copyup "struct mbuf *mbuf" "int len" "int dstoff"
97.Ft struct mbuf *
98.Fn m_pullup "struct mbuf *mbuf" "int len"
99.Ft struct mbuf *
100.Fn m_copym "struct mbuf *mbuf" "int offset" "int len" "int how"
101.Ft struct mbuf *
102.Fn m_copypacket "struct mbuf *mbuf" "int how"
103.Ft struct mbuf *
104.Fn m_dup "struct mbuf *mbuf" "int how"
105.Ft void
106.Fn m_copydata "const struct mbuf *mbuf" "int offset" "int len" "caddr_t buf"
107.Ft void
108.Fn m_copyback "struct mbuf *mbuf" "int offset" "int len" "caddr_t buf"
109.Ft struct mbuf *
110.Fo m_devget
111.Fa "char *buf"
112.Fa "int len"
113.Fa "int offset"
114.Fa "struct ifnet *ifp"
115.Fa "void (*copy)(char *from, caddr_t to, u_int len)"
116.Fc
117.Ft void
118.Fn m_cat "struct mbuf *m" "struct mbuf *n"
119.Ft u_int
120.Fn m_fixhdr "struct mbuf *mbuf"
121.Ft void
122.Fn m_dup_pkthdr "struct mbuf *to" "struct mbuf *from"
123.Ft void
124.Fn m_move_pkthdr "struct mbuf *to" "struct mbuf *from"
125.Ft u_int
126.Fn m_length "struct mbuf *mbuf" "struct mbuf **last"
127.Ft struct mbuf *
128.Fn m_split "struct mbuf *mbuf" "int len" "int how"
129.Ft int
130.Fn m_apply "struct mbuf *mbuf" "int off" "int len" "int (*f)(void *arg, void *data, u_int len)" "void *arg"
131.Ft struct mbuf *
132.Fn m_getptr "struct mbuf *mbuf" "int loc" "int *off"
133.Ft struct mbuf *
134.Fn m_defrag "struct mbuf *m0" "int how"
135.Ft struct mbuf *
136.Fn m_unshare "struct mbuf *m0" "int how"
137.\"
138.Sh DESCRIPTION
139An
140.Vt mbuf
141is a basic unit of memory management in the kernel IPC subsystem.
142Network packets and socket buffers are stored in
143.Vt mbufs .
144A network packet may span multiple
145.Vt mbufs
146arranged into a
147.Vt mbuf chain
148(linked list),
149which allows adding or trimming
150network headers with little overhead.
151.Pp
152While a developer should not bother with
153.Vt mbuf
154internals without serious
155reason in order to avoid incompatibilities with future changes, it
156is useful to understand the general structure of an
157.Vt mbuf .
158.Pp
159An
160.Vt mbuf
161consists of a variable-sized header and a small internal
162buffer for data.
163The total size of an
164.Vt mbuf ,
165.Dv MSIZE ,
166is a constant defined in
167.In sys/param.h .
168The
169.Vt mbuf
170header includes:
171.Pp
172.Bl -tag -width "m_nextpkt" -offset indent
173.It Va m_next
174.Pq Vt struct mbuf *
175A pointer to the next
176.Vt mbuf
177in the
178.Vt mbuf chain .
179.It Va m_nextpkt
180.Pq Vt struct mbuf *
181A pointer to the next
182.Vt mbuf chain
183in the queue.
184.It Va m_data
185.Pq Vt caddr_t
186A pointer to data attached to this
187.Vt mbuf .
188.It Va m_len
189.Pq Vt int
190The length of the data.
191.It Va m_type
192.Pq Vt short
193The type of the data.
194.It Va m_flags
195.Pq Vt int
196The
197.Vt mbuf
198flags.
199.El
200.Pp
201The
202.Vt mbuf
203flag bits are defined as follows:
204.Bd -literal
205/* mbuf flags */
206#define	M_EXT		0x0001	/* has associated external storage */
207#define	M_PKTHDR	0x0002	/* start of record */
208#define	M_EOR		0x0004	/* end of record */
209#define	M_RDONLY	0x0008	/* associated data marked read-only */
210#define	M_PROTO1	0x0010	/* protocol-specific */
211#define	M_PROTO2	0x0020 	/* protocol-specific */
212#define	M_PROTO3	0x0040	/* protocol-specific */
213#define	M_PROTO4	0x0080	/* protocol-specific */
214#define	M_PROTO5	0x0100	/* protocol-specific */
215#define	M_PROTO6	0x4000	/* protocol-specific (avoid M_BCAST conflict) */
216#define	M_FREELIST	0x8000	/* mbuf is on the free list */
217
218/* mbuf pkthdr flags (also stored in m_flags) */
219#define	M_BCAST		0x0200	/* send/received as link-level broadcast */
220#define	M_MCAST		0x0400	/* send/received as link-level multicast */
221#define	M_FRAG		0x0800	/* packet is fragment of larger packet */
222#define	M_FIRSTFRAG	0x1000	/* packet is first fragment */
223#define	M_LASTFRAG	0x2000	/* packet is last fragment */
224.Ed
225.Pp
226The available
227.Vt mbuf
228types are defined as follows:
229.Bd -literal
230/* mbuf types */
231#define	MT_DATA		1	/* dynamic (data) allocation */
232#define	MT_HEADER	2	/* packet header */
233#define	MT_SONAME	8	/* socket name */
234#define	MT_FTABLE	11	/* fragment reassembly header */
235#define	MT_CONTROL	14	/* extra-data protocol message */
236#define	MT_OOBDATA	15	/* expedited data */
237.Ed
238.Pp
239If the
240.Dv M_PKTHDR
241flag is set, a
242.Vt struct pkthdr Va m_pkthdr
243is added to the
244.Vt mbuf
245header.
246It contains a pointer to the interface
247the packet has been received from
248.Pq Vt struct ifnet Va *rcvif ,
249and the total packet length
250.Pq Vt int Va len .
251Optionally, it may also contain an attached list of packet tags
252.Pq Vt "struct m_tag" .
253See
254.Xr mbuf_tags 9
255for details.
256Fields used in offloading checksum calculation to the hardware are kept in
257.Va m_pkthdr
258as well.
259See
260.Sx HARDWARE-ASSISTED CHECKSUM CALCULATION
261for details.
262.Pp
263If small enough, data is stored in the internal data buffer of an
264.Vt mbuf .
265If the data is sufficiently large, another
266.Vt mbuf
267may be added to the
268.Vt mbuf chain ,
269or external storage may be associated with the
270.Vt mbuf .
271.Dv MHLEN
272bytes of data can fit into an
273.Vt mbuf
274with the
275.Dv M_PKTHDR
276flag set,
277.Dv MLEN
278bytes can otherwise.
279.Pp
280If external storage is being associated with an
281.Vt mbuf ,
282the
283.Va m_ext
284header is added at the cost of losing the internal data buffer.
285It includes a pointer to external storage, the size of the storage,
286a pointer to a function used for freeing the storage,
287a pointer to an optional argument that can be passed to the function,
288and a pointer to a reference counter.
289An
290.Vt mbuf
291using external storage has the
292.Dv M_EXT
293flag set.
294.Pp
295The system supplies a macro for allocating the desired external storage
296buffer,
297.Dv MEXTADD .
298.Pp
299The allocation and management of the reference counter is handled by the
300subsystem.
301.Pp
302The system also supplies a default type of external storage buffer called an
303.Vt mbuf cluster .
304.Vt Mbuf clusters
305can be allocated and configured with the use of the
306.Dv MCLGET
307macro.
308Each
309.Vt mbuf cluster
310is
311.Dv MCLBYTES
312in size, where MCLBYTES is a machine-dependent constant.
313The system defines an advisory macro
314.Dv MINCLSIZE ,
315which is the smallest amount of data to put into an
316.Vt mbuf cluster .
317It is equal to the sum of
318.Dv MLEN
319and
320.Dv MHLEN .
321It is typically preferable to store data into the data region of an
322.Vt mbuf ,
323if size permits, as opposed to allocating a separate
324.Vt mbuf cluster
325to hold the same data.
326.\"
327.Ss Macros and Functions
328There are numerous predefined macros and functions that provide the
329developer with common utilities.
330.\"
331.Bl -ohang -offset indent
332.It Fn mtod mbuf type
333Convert an
334.Fa mbuf
335pointer to a data pointer.
336The macro expands to the data pointer cast to the pointer of the specified
337.Fa type .
338.Sy Note :
339It is advisable to ensure that there is enough contiguous data in
340.Fa mbuf .
341See
342.Fn m_pullup
343for details.
344.It Fn MGET mbuf how type
345Allocate an
346.Vt mbuf
347and initialize it to contain internal data.
348.Fa mbuf
349will point to the allocated
350.Vt mbuf
351on success, or be set to
352.Dv NULL
353on failure.
354The
355.Fa how
356argument is to be set to
357.Dv M_TRYWAIT
358or
359.Dv M_DONTWAIT .
360It specifies whether the caller is willing to block if necessary.
361If
362.Fa how
363is set to
364.Dv M_TRYWAIT ,
365a failed allocation will result in the caller being put
366to sleep for a designated
367kern.ipc.mbuf_wait
368.Xr ( sysctl 8
369tunable)
370number of ticks.
371A number of other functions and macros related to
372.Vt mbufs
373have the same argument because they may
374at some point need to allocate new
375.Vt mbufs .
376.Pp
377Programmers should be careful not to confuse the
378.Vt mbuf
379allocation flag
380.Dv M_DONTWAIT
381with the
382.Xr malloc 9
383allocation flag,
384.Dv M_NOWAIT .
385They are not the same.
386.It Fn MGETHDR mbuf how type
387Allocate an
388.Vt mbuf
389and initialize it to contain a packet header
390and internal data.
391See
392.Fn MGET
393for details.
394.It Fn MCLGET mbuf how
395Allocate and attach an
396.Vt mbuf cluster
397to
398.Fa mbuf .
399If the macro fails, the
400.Dv M_EXT
401flag will not be set in
402.Fa mbuf .
403.It Fn M_ALIGN mbuf len
404Set the pointer
405.Fa mbuf->m_data
406to place an object of the size
407.Fa len
408at the end of the internal data area of
409.Fa mbuf ,
410long word aligned.
411Applicable only if
412.Fa mbuf
413is newly allocated with
414.Fn MGET
415or
416.Fn m_get .
417.It Fn MH_ALIGN mbuf len
418Serves the same purpose as
419.Fn M_ALIGN
420does, but only for
421.Fa mbuf
422newly allocated with
423.Fn MGETHDR
424or
425.Fn m_gethdr ,
426or initialized by
427.Fn m_dup_pkthdr
428or
429.Fn m_move_pkthdr .
430.It Fn m_align mbuf len
431Services the same purpose as
432.Fn M_ALIGN
433but handles any type of mbuf.
434.It Fn M_LEADINGSPACE mbuf
435Returns the number of bytes available before the beginning
436of data in
437.Fa mbuf .
438.It Fn M_TRAILINGSPACE mbuf
439Returns the number of bytes available after the end of data in
440.Fa mbuf .
441.It Fn M_PREPEND mbuf len how
442This macro operates on an
443.Vt mbuf chain .
444It is an optimized wrapper for
445.Fn m_prepend
446that can make use of possible empty space before data
447(e.g.\& left after trimming of a link-layer header).
448The new
449.Vt mbuf chain
450pointer or
451.Dv NULL
452is in
453.Fa mbuf
454after the call.
455.It Fn M_MOVE_PKTHDR to from
456Using this macro is equivalent to calling
457.Fn m_move_pkthdr to from .
458.It Fn M_WRITABLE mbuf
459This macro will evaluate true if
460.Fa mbuf
461is not marked
462.Dv M_RDONLY
463and if either
464.Fa mbuf
465does not contain external storage or,
466if it does,
467then if the reference count of the storage is not greater than 1.
468The
469.Dv M_RDONLY
470flag can be set in
471.Fa mbuf->m_flags .
472This can be achieved during setup of the external storage,
473by passing the
474.Dv M_RDONLY
475bit as a
476.Fa flags
477argument to the
478.Fn MEXTADD
479macro, or can be directly set in individual
480.Vt mbufs .
481.It Fn MCHTYPE mbuf type
482Change the type of
483.Fa mbuf
484to
485.Fa type .
486This is a relatively expensive operation and should be avoided.
487.El
488.Pp
489The functions are:
490.Bl -ohang -offset indent
491.It Fn m_get how type
492A function version of
493.Fn MGET
494for non-critical paths.
495.It Fn m_getm orig len how type
496Allocate
497.Fa len
498bytes worth of
499.Vt mbufs
500and
501.Vt mbuf clusters
502if necessary and append the resulting allocated
503.Vt mbuf chain
504to the
505.Vt mbuf chain
506.Fa orig ,
507if it is
508.No non- Ns Dv NULL .
509If the allocation fails at any point,
510free whatever was allocated and return
511.Dv NULL .
512If
513.Fa orig
514is
515.No non- Ns Dv NULL ,
516it will not be freed.
517It is possible to use
518.Fn m_getm
519to either append
520.Fa len
521bytes to an existing
522.Vt mbuf
523or
524.Vt mbuf chain
525(for example, one which may be sitting in a pre-allocated ring)
526or to simply perform an all-or-nothing
527.Vt mbuf
528and
529.Vt mbuf cluster
530allocation.
531.It Fn m_gethdr how type
532A function version of
533.Fn MGETHDR
534for non-critical paths.
535.It Fn m_getcl how type flags
536Fetch an
537.Vt mbuf
538with a
539.Vt mbuf cluster
540attached to it.
541If one of the allocations fails, the entire allocation fails.
542This routine is the preferred way of fetching both the
543.Vt mbuf
544and
545.Vt mbuf cluster
546together, as it avoids having to unlock/relock between allocations.
547Returns
548.Dv NULL
549on failure.
550.It Fn m_getclr how type
551Allocate an
552.Vt mbuf
553and zero out the data region.
554.It Fn m_free mbuf
555Frees
556.Vt mbuf .
557Returns
558.Va m_next
559of the freed
560.Vt mbuf .
561.El
562.Pp
563The functions below operate on
564.Vt mbuf chains .
565.Bl -ohang -offset indent
566.It Fn m_freem mbuf
567Free an entire
568.Vt mbuf chain ,
569including any external storage.
570.\"
571.It Fn m_adj mbuf len
572Trim
573.Fa len
574bytes from the head of an
575.Vt mbuf chain
576if
577.Fa len
578is positive, from the tail otherwise.
579.\"
580.It Fn m_append mbuf len cp
581Append
582.Vt len
583bytes of data
584.Vt cp
585to the
586.Vt mbuf chain .
587Extend the mbuf chain if the new data does not fit in
588existing space.
589.\"
590.It Fn m_prepend mbuf len how
591Allocate a new
592.Vt mbuf
593and prepend it to the
594.Vt mbuf chain ,
595handle
596.Dv M_PKTHDR
597properly.
598.Sy Note :
599It does not allocate any
600.Vt mbuf clusters ,
601so
602.Fa len
603must be less than
604.Dv MLEN
605or
606.Dv MHLEN ,
607depending on the
608.Dv M_PKTHDR
609flag setting.
610.\"
611.It Fn m_copyup mbuf len dstoff
612Similar to
613.Fn m_pullup
614but copies
615.Fa len
616bytes of data into a new mbuf at
617.Fa dstoff
618bytes into the mbuf.
619The
620.Fa dstoff
621argument aligns the data and leaves room for a link layer header.
622Returns the new
623.Vt mbuf chain
624on success,
625and frees the
626.Vt mbuf chain
627and returns
628.Dv NULL
629on failure.
630.Sy Note :
631The function does not allocate
632.Vt mbuf clusters ,
633so
634.Fa len + dstoff
635must be less than
636.Dv MHLEN .
637.\"
638.It Fn m_pullup mbuf len
639Arrange that the first
640.Fa len
641bytes of an
642.Vt mbuf chain
643are contiguous and lay in the data area of
644.Fa mbuf ,
645so they are accessible with
646.Fn mtod mbuf type .
647Return the new
648.Vt mbuf chain
649on success,
650.Dv NULL
651on failure
652(the
653.Vt mbuf chain
654is freed in this case).
655.Sy Note :
656It does not allocate any
657.Vt mbuf clusters ,
658so
659.Fa len
660must be less than
661.Dv MHLEN .
662.\"
663.It Fn m_copym mbuf offset len how
664Make a copy of an
665.Vt mbuf chain
666starting
667.Fa offset
668bytes from the beginning, continuing for
669.Fa len
670bytes.
671If
672.Fa len
673is
674.Dv M_COPYALL ,
675copy to the end of the
676.Vt mbuf chain .
677.Sy Note :
678The copy is read-only, because the
679.Vt mbuf clusters
680are not copied, only their reference counts are incremented.
681.\"
682.It Fn m_copypacket mbuf how
683Copy an entire packet including header, which must be present.
684This is an optimized version of the common case
685.Fn m_copym mbuf 0 M_COPYALL how .
686.Sy Note :
687the copy is read-only, because the
688.Vt mbuf clusters
689are not copied, only their reference counts are incremented.
690.\"
691.It Fn m_dup mbuf how
692Copy a packet header
693.Vt mbuf chain
694into a completely new
695.Vt mbuf chain ,
696including copying any
697.Vt mbuf clusters .
698Use this instead of
699.Fn m_copypacket
700when you need a writable copy of an
701.Vt mbuf chain .
702.\"
703.It Fn m_copydata mbuf offset len buf
704Copy data from an
705.Vt mbuf chain
706starting
707.Fa off
708bytes from the beginning, continuing for
709.Fa len
710bytes, into the indicated buffer
711.Fa buf .
712.\"
713.It Fn m_copyback mbuf offset len buf
714Copy
715.Fa len
716bytes from the buffer
717.Fa buf
718back into the indicated
719.Vt mbuf chain ,
720starting at
721.Fa offset
722bytes from the beginning of the
723.Vt mbuf chain ,
724extending the
725.Vt mbuf chain
726if necessary.
727.Sy Note :
728It does not allocate any
729.Vt mbuf clusters ,
730just adds
731.Vt mbufs
732to the
733.Vt mbuf chain .
734It is safe to set
735.Fa offset
736beyond the current
737.Vt mbuf chain
738end: zeroed
739.Vt mbufs
740will be allocated to fill the space.
741.\"
742.It Fn m_length mbuf last
743Return the length of the
744.Vt mbuf chain ,
745and optionally a pointer to the last
746.Vt mbuf .
747.\"
748.It Fn m_dup_pkthdr to from how
749Upon the function's completion, the
750.Vt mbuf
751.Fa to
752will contain an identical copy of
753.Fa from->m_pkthdr
754and the per-packet attributes found in the
755.Vt mbuf chain
756.Fa from .
757The
758.Vt mbuf
759.Fa from
760must have the flag
761.Dv M_PKTHDR
762initially set, and
763.Fa to
764must be empty on entry.
765.\"
766.It Fn m_move_pkthdr to from
767Move
768.Va m_pkthdr
769and the per-packet attributes from the
770.Vt mbuf chain
771.Fa from
772to the
773.Vt mbuf
774.Fa to .
775The
776.Vt mbuf
777.Fa from
778must have the flag
779.Dv M_PKTHDR
780initially set, and
781.Fa to
782must be empty on entry.
783Upon the function's completion,
784.Fa from
785will have the flag
786.Dv M_PKTHDR
787and the per-packet attributes cleared.
788.\"
789.It Fn m_fixhdr mbuf
790Set the packet-header length to the length of the
791.Vt mbuf chain .
792.\"
793.It Fn m_devget buf len offset ifp copy
794Copy data from a device local memory pointed to by
795.Fa buf
796to an
797.Vt mbuf chain .
798The copy is done using a specified copy routine
799.Fa copy ,
800or
801.Fn bcopy
802if
803.Fa copy
804is
805.Dv NULL .
806.\"
807.It Fn m_cat m n
808Concatenate
809.Fa n
810to
811.Fa m .
812Both
813.Vt mbuf chains
814must be of the same type.
815.Fa N
816is still valid after the function returned.
817.Sy Note :
818It does not handle
819.Dv M_PKTHDR
820and friends.
821.\"
822.It Fn m_split mbuf len how
823Partition an
824.Vt mbuf chain
825in two pieces, returning the tail:
826all but the first
827.Fa len
828bytes.
829In case of failure, it returns
830.Dv NULL
831and attempts to restore the
832.Vt mbuf chain
833to its original state.
834.\"
835.It Fn m_apply mbuf off len f arg
836Apply a function to an
837.Vt mbuf chain ,
838at offset
839.Fa off ,
840for length
841.Fa len
842bytes.
843Typically used to avoid calls to
844.Fn m_pullup
845which would otherwise be unnecessary or undesirable.
846.Fa arg
847is a convenience argument which is passed to the callback function
848.Fa f .
849.Pp
850Each time
851.Fn f
852is called, it will be passed
853.Fa arg ,
854a pointer to the
855.Fa data
856in the current mbuf, and the length
857.Fa len
858of the data in this mbuf to which the function should be applied.
859.Pp
860The function should return zero to indicate success;
861otherwise, if an error is indicated, then
862.Fn m_apply
863will return the error and stop iterating through the
864.Vt mbuf chain .
865.\"
866.It Fn m_getptr mbuf loc off
867Return a pointer to the mbuf containing the data located at
868.Fa loc
869bytes from the beginning of the
870.Vt mbuf chain .
871The corresponding offset into the mbuf will be stored in
872.Fa *off .
873.It Fn m_defrag m0 how
874Defragment an mbuf chain, returning the shortest possible
875chain of mbufs and clusters.
876If allocation fails and this can not be completed,
877.Dv NULL
878will be returned and the original chain will be unchanged.
879Upon success, the original chain will be freed and the new
880chain will be returned.
881.Fa how
882should be either
883.Dv M_TRYWAIT
884or
885.Dv M_DONTWAIT ,
886depending on the caller's preference.
887.Pp
888This function is especially useful in network drivers, where
889certain long mbuf chains must be shortened before being added
890to TX descriptor lists.
891.It Fn m_unshare m0 how
892Create a version of the specified mbuf chain whose
893contents can be safely modified without affecting other users.
894If allocation fails and this operation can not be completed,
895.Dv NULL
896will be returned.
897The original mbuf chain is always reclaimed and the reference
898count of any shared mbuf clusters is decremented.
899.Fa how
900should be either
901.Dv M_TRYWAIT
902or
903.Dv M_DONTWAIT ,
904depending on the caller's preference.
905As a side-effect of this process the returned
906mbuf chain may be compacted.
907.Pp
908This function is especially useful in the transmit path of
909network code, when data must be encrypted or otherwise
910altered prior to transmission.
911.El
912.Sh HARDWARE-ASSISTED CHECKSUM CALCULATION
913This section currently applies to TCP/IP only.
914In order to save the host CPU resources, computing checksums is
915offloaded to the network interface hardware if possible.
916The
917.Va m_pkthdr
918member of the leading
919.Vt mbuf
920of a packet contains two fields used for that purpose,
921.Vt int Va csum_flags
922and
923.Vt int Va csum_data .
924The meaning of those fields depends on the direction a packet flows in,
925and on whether the packet is fragmented.
926Henceforth,
927.Va csum_flags
928or
929.Va csum_data
930of a packet
931will denote the corresponding field of the
932.Va m_pkthdr
933member of the leading
934.Vt mbuf
935in the
936.Vt mbuf chain
937containing the packet.
938.Pp
939On output, checksum offloading is attempted after the outgoing
940interface has been determined for a packet.
941The interface-specific field
942.Va ifnet.if_data.ifi_hwassist
943(see
944.Xr ifnet 9 )
945is consulted for the capabilities of the interface to assist in
946computing checksums.
947The
948.Va csum_flags
949field of the packet header is set to indicate which actions the interface
950is supposed to perform on it.
951The actions unsupported by the network interface are done in the
952software prior to passing the packet down to the interface driver;
953such actions will never be requested through
954.Va csum_flags .
955.Pp
956The flags demanding a particular action from an interface are as follows:
957.Bl -tag -width ".Dv CSUM_TCP" -offset indent
958.It Dv CSUM_IP
959The IP header checksum is to be computed and stored in the
960corresponding field of the packet.
961The hardware is expected to know the format of an IP header
962to determine the offset of the IP checksum field.
963.It Dv CSUM_TCP
964The TCP checksum is to be computed.
965(See below.)
966.It Dv CSUM_UDP
967The UDP checksum is to be computed.
968(See below.)
969.El
970.Pp
971Should a TCP or UDP checksum be offloaded to the hardware,
972the field
973.Va csum_data
974will contain the byte offset of the checksum field relative to the
975end of the IP header.
976In this case, the checksum field will be initially
977set by the TCP/IP module to the checksum of the pseudo header
978defined by the TCP and UDP specifications.
979.Pp
980For outbound packets which have been fragmented
981by the host CPU, the following will also be true,
982regardless of the checksum flag settings:
983.Bl -bullet -offset indent
984.It
985all fragments will have the flag
986.Dv M_FRAG
987set in their
988.Va m_flags
989field;
990.It
991the first and the last fragments in the chain will have
992.Dv M_FIRSTFRAG
993or
994.Dv M_LASTFRAG
995set in their
996.Va m_flags ,
997correspondingly;
998.It
999the first fragment in the chain will have the total number
1000of fragments contained in its
1001.Va csum_data
1002field.
1003.El
1004.Pp
1005The last rule for fragmented packets takes precedence over the one
1006for a TCP or UDP checksum.
1007Nevertheless, offloading a TCP or UDP checksum is possible for a
1008fragmented packet if the flag
1009.Dv CSUM_IP_FRAGS
1010is set in the field
1011.Va ifnet.if_data.ifi_hwassist
1012associated with the network interface.
1013However, in this case the interface is expected to figure out
1014the location of the checksum field within the sequence of fragments
1015by itself because
1016.Va csum_data
1017contains a fragment count instead of a checksum offset value.
1018.Pp
1019On input, an interface indicates the actions it has performed
1020on a packet by setting one or more of the following flags in
1021.Va csum_flags
1022associated with the packet:
1023.Bl -tag -width ".Dv CSUM_IP_CHECKED" -offset indent
1024.It Dv CSUM_IP_CHECKED
1025The IP header checksum has been computed.
1026.It Dv CSUM_IP_VALID
1027The IP header has a valid checksum.
1028This flag can appear only in combination with
1029.Dv CSUM_IP_CHECKED .
1030.It Dv CSUM_DATA_VALID
1031The checksum of the data portion of the IP packet has been computed
1032and stored in the field
1033.Va csum_data
1034in network byte order.
1035.It Dv CSUM_PSEUDO_HDR
1036Can be set only along with
1037.Dv CSUM_DATA_VALID
1038to indicate that the IP data checksum found in
1039.Va csum_data
1040allows for the pseudo header defined by the TCP and UDP specifications.
1041Otherwise the checksum of the pseudo header must be calculated by
1042the host CPU and added to
1043.Va csum_data
1044to obtain the final checksum to be used for TCP or UDP validation purposes.
1045.El
1046.Pp
1047If a particular network interface just indicates success or
1048failure of TCP or UDP checksum validation without returning
1049the exact value of the checksum to the host CPU, its driver can mark
1050.Dv CSUM_DATA_VALID
1051and
1052.Dv CSUM_PSEUDO_HDR
1053in
1054.Va csum_flags ,
1055and set
1056.Va csum_data
1057to
1058.Li 0xFFFF
1059hexadecimal to indicate a valid checksum.
1060It is a peculiarity of the algorithm used that the Internet checksum
1061calculated over any valid packet will be
1062.Li 0xFFFF
1063as long as the original checksum field is included.
1064.Pp
1065For inbound packets which are IP fragments, all
1066.Va csum_data
1067fields will be summed during reassembly to obtain the final checksum
1068value passed to an upper layer in the
1069.Va csum_data
1070field of the reassembled packet.
1071The
1072.Va csum_flags
1073fields of all fragments will be consolidated using logical AND
1074to obtain the final value for
1075.Va csum_flags .
1076Thus, in order to successfully
1077offload checksum computation for fragmented data,
1078all fragments should have the same value of
1079.Va csum_flags .
1080.Sh STRESS TESTING
1081When running a kernel compiled with the option
1082.Dv MBUF_STRESS_TEST ,
1083the following
1084.Xr sysctl 8 Ns
1085-controlled options may be used to create
1086various failure/extreme cases for testing of network drivers
1087and other parts of the kernel that rely on
1088.Vt mbufs .
1089.Bl -tag -width ident
1090.It Va net.inet.ip.mbuf_frag_size
1091Causes
1092.Fn ip_output
1093to fragment outgoing
1094.Vt mbuf chains
1095into fragments of the specified size.
1096Setting this variable to 1 is an excellent way to
1097test the long
1098.Vt mbuf chain
1099handling ability of network drivers.
1100.It Va kern.ipc.m_defragrandomfailures
1101Causes the function
1102.Fn m_defrag
1103to randomly fail, returning
1104.Dv NULL .
1105Any piece of code which uses
1106.Fn m_defrag
1107should be tested with this feature.
1108.El
1109.Sh RETURN VALUES
1110See above.
1111.Sh SEE ALSO
1112.Xr ifnet 9 ,
1113.Xr mbuf_tags 9
1114.Sh HISTORY
1115.\" Please correct me if I'm wrong
1116.Vt Mbufs
1117appeared in an early version of
1118.Bx .
1119Besides being used for network packets, they were used
1120to store various dynamic structures, such as routing table
1121entries, interface addresses, protocol control blocks, etc.
1122.Sh AUTHORS
1123The original
1124.Nm
1125manual page was written by Yar Tikhiy.
1126