xref: /freebsd/share/man/man9/mbuf.9 (revision 7afc53b8dfcc7d5897920ce6cc7e842fbb4ab813)
1.\" Copyright (c) 2000 FreeBSD Inc.
2.\" All rights reserved.
3.\"
4.\" Redistribution and use in source and binary forms, with or without
5.\" modification, are permitted provided that the following conditions
6.\" are met:
7.\" 1. Redistributions of source code must retain the above copyright
8.\"    notice, this list of conditions and the following disclaimer.
9.\" 2. Redistributions in binary form must reproduce the above copyright
10.\"    notice, this list of conditions and the following disclaimer in the
11.\"    documentation and/or other materials provided with the distribution.
12.\"
13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16.\" ARE DISCLAIMED.  IN NO EVENT SHALL [your name] OR CONTRIBUTORS BE LIABLE
17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23.\" SUCH DAMAGE.
24.\"
25.\" $FreeBSD$
26.\"
27.Dd August 27, 2004
28.Dt MBUF 9
29.Os
30.\"
31.Sh NAME
32.Nm mbuf
33.Nd "memory management in the kernel IPC subsystem"
34.\"
35.Sh SYNOPSIS
36.In sys/param.h
37.In sys/systm.h
38.In sys/mbuf.h
39.\"
40.Ss Mbuf allocation macros
41.Fn MGET "struct mbuf *mbuf" "int how" "short type"
42.Fn MGETHDR "struct mbuf *mbuf" "int how" "short type"
43.Fn MCLGET "struct mbuf *mbuf" "int how"
44.Fo MEXTADD
45.Fa "struct mbuf *mbuf"
46.Fa "caddr_t buf"
47.Fa "u_int size"
48.Fa "void (*free)(void *opt_args)"
49.Fa "void *opt_args"
50.Fa "short flags"
51.Fa "int type"
52.Fc
53.Fn MEXTFREE "struct mbuf *mbuf"
54.Fn MEXT_ADD_REF "struct mbuf *mbuf"
55.Fn MEXT_REM_REF "struct mbuf *mbuf"
56.Fn MFREE "struct mbuf *mbuf" "struct mbuf *successor"
57.\"
58.Ss Mbuf utility macros
59.Fn mtod "struct mbuf *mbuf" "type"
60.Ft int
61.Fn MEXT_IS_REF "struct mbuf *mbuf"
62.Fn M_ALIGN "struct mbuf *mbuf" "u_int len"
63.Fn MH_ALIGN "struct mbuf *mbuf" "u_int len"
64.Ft int
65.Fn M_LEADINGSPACE "struct mbuf *mbuf"
66.Ft int
67.Fn M_TRAILINGSPACE "struct mbuf *mbuf"
68.Fn M_MOVE_PKTHDR "struct mbuf *to" "struct mbuf *from"
69.Fn M_PREPEND "struct mbuf *mbuf" "int len" "int how"
70.Fn MCHTYPE "struct mbuf *mbuf" "u_int type"
71.Ft int
72.Fn M_WRITABLE "struct mbuf *mbuf"
73.\"
74.Ss Mbuf allocation functions
75.Ft struct mbuf *
76.Fn m_get "int how" "int type"
77.Ft struct mbuf *
78.Fn m_getm "struct mbuf *orig" "int len" "int how" "int type"
79.Ft struct mbuf *
80.Fn m_getcl "int how" "short type" "int flags"
81.Ft struct mbuf *
82.Fn m_getclr "int how" "int type"
83.Ft struct mbuf *
84.Fn m_gethdr "int how" "int type"
85.Ft struct mbuf *
86.Fn m_free "struct mbuf *mbuf"
87.Ft void
88.Fn m_freem "struct mbuf *mbuf"
89.\"
90.Ss Mbuf utility functions
91.Ft void
92.Fn m_adj "struct mbuf *mbuf" "int len"
93.Ft int
94.Fn m_append "struct mbuf *mbuf" "int len" "c_caddr_t cp"
95.Ft struct mbuf *
96.Fn m_prepend "struct mbuf *mbuf" "int len" "int how"
97.Ft struct mbuf *
98.Fn m_copyup "struct mbuf *mbuf" "int len" "int dstoff"
99.Ft struct mbuf *
100.Fn m_pullup "struct mbuf *mbuf" "int len"
101.Ft struct mbuf *
102.Fn m_copym "struct mbuf *mbuf" "int offset" "int len" "int how"
103.Ft struct mbuf *
104.Fn m_copypacket "struct mbuf *mbuf" "int how"
105.Ft struct mbuf *
106.Fn m_dup "struct mbuf *mbuf" "int how"
107.Ft void
108.Fn m_copydata "const struct mbuf *mbuf" "int offset" "int len" "caddr_t buf"
109.Ft void
110.Fn m_copyback "struct mbuf *mbuf" "int offset" "int len" "caddr_t buf"
111.Ft struct mbuf *
112.Fo m_devget
113.Fa "char *buf"
114.Fa "int len"
115.Fa "int offset"
116.Fa "struct ifnet *ifp"
117.Fa "void (*copy)(char *from, caddr_t to, u_int len)"
118.Fc
119.Ft void
120.Fn m_cat "struct mbuf *m" "struct mbuf *n"
121.Ft u_int
122.Fn m_fixhdr "struct mbuf *mbuf"
123.Ft void
124.Fn m_dup_pkthdr "struct mbuf *to" "struct mbuf *from"
125.Ft void
126.Fn m_move_pkthdr "struct mbuf *to" "struct mbuf *from"
127.Ft u_int
128.Fn m_length "struct mbuf *mbuf" "struct mbuf **last"
129.Ft struct mbuf *
130.Fn m_split "struct mbuf *mbuf" "int len" "int how"
131.Ft int
132.Fn m_apply "struct mbuf *mbuf" "int off" "int len" "int (*f)(void *arg, void *data, u_int len)" "void *arg"
133.Ft struct mbuf *
134.Fn m_getptr "struct mbuf *mbuf" "int loc" "int *off"
135.Ft struct mbuf *
136.Fn m_defrag "struct mbuf *m0" "int how"
137.\"
138.Sh DESCRIPTION
139An
140.Vt mbuf
141is a basic unit of memory management in the kernel IPC subsystem.
142Network packets and socket buffers are stored in
143.Vt mbufs .
144A network packet may span multiple
145.Vt mbufs
146arranged into a
147.Vt mbuf chain
148(linked list),
149which allows adding or trimming
150network headers with little overhead.
151.Pp
152While a developer should not bother with
153.Vt mbuf
154internals without serious
155reason in order to avoid incompatibilities with future changes, it
156is useful to understand the general structure of an
157.Vt mbuf .
158.Pp
159An
160.Vt mbuf
161consists of a variable-sized header and a small internal
162buffer for data.
163The total size of an
164.Vt mbuf ,
165.Dv MSIZE ,
166is a constant defined in
167.In sys/param.h .
168The
169.Vt mbuf
170header includes:
171.Pp
172.Bl -tag -width "m_nextpkt" -offset indent
173.It Va m_next
174.Pq Vt struct mbuf *
175A pointer to the next
176.Vt mbuf
177in the
178.Vt mbuf chain .
179.It Va m_nextpkt
180.Pq Vt struct mbuf *
181A pointer to the next
182.Vt mbuf chain
183in the queue.
184.It Va m_data
185.Pq Vt caddr_t
186A pointer to data attached to this
187.Vt mbuf .
188.It Va m_len
189.Pq Vt int
190The length of the data.
191.It Va m_type
192.Pq Vt short
193The type of the data.
194.It Va m_flags
195.Pq Vt int
196The
197.Vt mbuf
198flags.
199.El
200.Pp
201The
202.Vt mbuf
203flag bits are defined as follows:
204.Bd -literal
205/* mbuf flags */
206#define	M_EXT		0x0001	/* has associated external storage */
207#define	M_PKTHDR	0x0002	/* start of record */
208#define	M_EOR		0x0004	/* end of record */
209#define	M_RDONLY	0x0008	/* associated data marked read-only */
210#define	M_PROTO1	0x0010	/* protocol-specific */
211#define	M_PROTO2	0x0020 	/* protocol-specific */
212#define	M_PROTO3	0x0040	/* protocol-specific */
213#define	M_PROTO4	0x0080	/* protocol-specific */
214#define	M_PROTO5	0x0100	/* protocol-specific */
215#define	M_PROTO6	0x4000	/* protocol-specific (avoid M_BCAST conflict) */
216#define	M_FREELIST	0x8000	/* mbuf is on the free list */
217
218/* mbuf pkthdr flags (also stored in m_flags) */
219#define	M_BCAST		0x0200	/* send/received as link-level broadcast */
220#define	M_MCAST		0x0400	/* send/received as link-level multicast */
221#define	M_FRAG		0x0800	/* packet is fragment of larger packet */
222#define	M_FIRSTFRAG	0x1000	/* packet is first fragment */
223#define	M_LASTFRAG	0x2000	/* packet is last fragment */
224.Ed
225.Pp
226The available
227.Vt mbuf
228types are defined as follows:
229.Bd -literal
230/* mbuf types */
231#define	MT_DATA		1	/* dynamic (data) allocation */
232#define	MT_HEADER	2	/* packet header */
233#define	MT_SONAME	8	/* socket name */
234#define	MT_FTABLE	11	/* fragment reassembly header */
235#define	MT_CONTROL	14	/* extra-data protocol message */
236#define	MT_OOBDATA	15	/* expedited data */
237.Ed
238.Pp
239If the
240.Dv M_PKTHDR
241flag is set, a
242.Vt struct pkthdr Va m_pkthdr
243is added to the
244.Vt mbuf
245header.
246It contains a pointer to the interface
247the packet has been received from
248.Pq Vt struct ifnet Va *rcvif ,
249and the total packet length
250.Pq Vt int Va len .
251Optionally, it may also contain an attached list of packet tags
252.Pq Vt "struct m_tag" .
253See
254.Xr mbuf_tags 9
255for details.
256Fields used in offloading checksum calculation to the hardware are kept in
257.Va m_pkthdr
258as well.
259See
260.Sx HARDWARE-ASSISTED CHECKSUM CALCULATION
261for details.
262.Pp
263If small enough, data is stored in the internal data buffer of an
264.Vt mbuf .
265If the data is sufficiently large, another
266.Vt mbuf
267may be added to the
268.Vt mbuf chain ,
269or external storage may be associated with the
270.Vt mbuf .
271.Dv MHLEN
272bytes of data can fit into an
273.Vt mbuf
274with the
275.Dv M_PKTHDR
276flag set,
277.Dv MLEN
278bytes can otherwise.
279.Pp
280If external storage is being associated with an
281.Vt mbuf ,
282the
283.Va m_ext
284header is added at the cost of losing the internal data buffer.
285It includes a pointer to external storage, the size of the storage,
286a pointer to a function used for freeing the storage,
287a pointer to an optional argument that can be passed to the function,
288and a pointer to a reference counter.
289An
290.Vt mbuf
291using external storage has the
292.Dv M_EXT
293flag set.
294.Pp
295The system supplies a macro for allocating the desired external storage
296buffer,
297.Dv MEXTADD .
298.Pp
299The allocation and management of the reference counter is handled by the
300subsystem.
301The developer can check whether the reference count for the
302external storage of a given
303.Vt mbuf
304is greater than 1 with the
305.Dv MEXT_IS_REF
306macro.
307Similarly, the developer can directly add and remove references,
308if absolutely necessary, with the use of the
309.Dv MEXT_ADD_REF
310and
311.Dv MEXT_REM_REF
312macros.
313.Pp
314The system also supplies a default type of external storage buffer called an
315.Vt mbuf cluster .
316.Vt Mbuf clusters
317can be allocated and configured with the use of the
318.Dv MCLGET
319macro.
320Each
321.Vt mbuf cluster
322is
323.Dv MCLBYTES
324in size, where MCLBYTES is a machine-dependent constant.
325The system defines an advisory macro
326.Dv MINCLSIZE ,
327which is the smallest amount of data to put into an
328.Vt mbuf cluster .
329It is equal to the sum of
330.Dv MLEN
331and
332.Dv MHLEN .
333It is typically preferable to store data into the data region of an
334.Vt mbuf ,
335if size permits, as opposed to allocating a separate
336.Vt mbuf cluster
337to hold the same data.
338.\"
339.Ss Macros and Functions
340There are numerous predefined macros and functions that provide the
341developer with common utilities.
342.\"
343.Bl -ohang -offset indent
344.It Fn mtod mbuf type
345Convert an
346.Fa mbuf
347pointer to a data pointer.
348The macro expands to the data pointer cast to the pointer of the specified
349.Fa type .
350.Sy Note :
351It is advisable to ensure that there is enough contiguous data in
352.Fa mbuf .
353See
354.Fn m_pullup
355for details.
356.It Fn MGET mbuf how type
357Allocate an
358.Vt mbuf
359and initialize it to contain internal data.
360.Fa mbuf
361will point to the allocated
362.Vt mbuf
363on success, or be set to
364.Dv NULL
365on failure.
366The
367.Fa how
368argument is to be set to
369.Dv M_TRYWAIT
370or
371.Dv M_DONTWAIT .
372It specifies whether the caller is willing to block if necessary.
373If
374.Fa how
375is set to
376.Dv M_TRYWAIT ,
377a failed allocation will result in the caller being put
378to sleep for a designated
379kern.ipc.mbuf_wait
380.Xr ( sysctl 8
381tunable)
382number of ticks.
383A number of other functions and macros related to
384.Vt mbufs
385have the same argument because they may
386at some point need to allocate new
387.Vt mbufs .
388.Pp
389Programmers should be careful not to confuse the
390.Vt mbuf
391allocation flag
392.Dv M_DONTWAIT
393with the
394.Xr malloc 9
395allocation flag,
396.Dv M_NOWAIT .
397They are not the same.
398.It Fn MGETHDR mbuf how type
399Allocate an
400.Vt mbuf
401and initialize it to contain a packet header
402and internal data.
403See
404.Fn MGET
405for details.
406.It Fn MCLGET mbuf how
407Allocate and attach an
408.Vt mbuf cluster
409to
410.Fa mbuf .
411If the macro fails, the
412.Dv M_EXT
413flag will not be set in
414.Fa mbuf .
415.It Fn M_ALIGN mbuf len
416Set the pointer
417.Fa mbuf->m_data
418to place an object of the size
419.Fa len
420at the end of the internal data area of
421.Fa mbuf ,
422long word aligned.
423Applicable only if
424.Fa mbuf
425is newly allocated with
426.Fn MGET
427or
428.Fn m_get .
429.It Fn MH_ALIGN mbuf len
430Serves the same purpose as
431.Fn M_ALIGN
432does, but only for
433.Fa mbuf
434newly allocated with
435.Fn MGETHDR
436or
437.Fn m_gethdr ,
438or initialized by
439.Fn m_dup_pkthdr
440or
441.Fn m_move_pkthdr .
442.It Fn M_LEADINGSPACE mbuf
443Returns the number of bytes available before the beginning
444of data in
445.Fa mbuf .
446.It Fn M_TRAILINGSPACE mbuf
447Returns the number of bytes available after the end of data in
448.Fa mbuf .
449.It Fn M_PREPEND mbuf len how
450This macro operates on an
451.Vt mbuf chain .
452It is an optimized wrapper for
453.Fn m_prepend
454that can make use of possible empty space before data
455(e.g.\& left after trimming of a link-layer header).
456The new
457.Vt mbuf chain
458pointer or
459.Dv NULL
460is in
461.Fa mbuf
462after the call.
463.It Fn M_MOVE_PKTHDR to from
464Using this macro is equivalent to calling
465.Fn m_move_pkthdr to from .
466.It Fn M_WRITABLE mbuf
467This macro will evaluate true if
468.Fa mbuf
469is not marked
470.Dv M_RDONLY
471and if either
472.Fa mbuf
473does not contain external storage or,
474if it does,
475then if the reference count of the storage is not greater than 1.
476The
477.Dv M_RDONLY
478flag can be set in
479.Fa mbuf->m_flags .
480This can be achieved during setup of the external storage,
481by passing the
482.Dv M_RDONLY
483bit as a
484.Fa flags
485argument to the
486.Fn MEXTADD
487macro, or can be directly set in individual
488.Vt mbufs .
489.It Fn MCHTYPE mbuf type
490Change the type of
491.Fa mbuf
492to
493.Fa type .
494This is a relatively expensive operation and should be avoided.
495.El
496.Pp
497The functions are:
498.Bl -ohang -offset indent
499.It Fn m_get how type
500A function version of
501.Fn MGET
502for non-critical paths.
503.It Fn m_getm orig len how type
504Allocate
505.Fa len
506bytes worth of
507.Vt mbufs
508and
509.Vt mbuf clusters
510if necessary and append the resulting allocated
511.Vt mbuf chain
512to the
513.Vt mbuf chain
514.Fa orig ,
515if it is
516.No non- Ns Dv NULL .
517If the allocation fails at any point,
518free whatever was allocated and return
519.Dv NULL .
520If
521.Fa orig
522is
523.No non- Ns Dv NULL ,
524it will not be freed.
525It is possible to use
526.Fn m_getm
527to either append
528.Fa len
529bytes to an existing
530.Vt mbuf
531or
532.Vt mbuf chain
533(for example, one which may be sitting in a pre-allocated ring)
534or to simply perform an all-or-nothing
535.Vt mbuf
536and
537.Vt mbuf cluster
538allocation.
539.It Fn m_gethdr how type
540A function version of
541.Fn MGETHDR
542for non-critical paths.
543.It Fn m_getcl how type flags
544Fetch an
545.Vt mbuf
546with a
547.Vt mbuf cluster
548attached to it.
549If one of the allocations fails, the entire allocation fails.
550This routine is the preferred way of fetching both the
551.Vt mbuf
552and
553.Vt mbuf cluster
554together, as it avoids having to unlock/relock between allocations.
555Returns
556.Dv NULL
557on failure.
558.It Fn m_getclr how type
559Allocate an
560.Vt mbuf
561and zero out the data region.
562.It Fn m_free mbuf
563Frees
564.Vt mbuf .
565Returns
566.Va m_next
567of the freed
568.Vt mbuf .
569.El
570.Pp
571The functions below operate on
572.Vt mbuf chains .
573.Bl -ohang -offset indent
574.It Fn m_freem mbuf
575Free an entire
576.Vt mbuf chain ,
577including any external storage.
578.\"
579.It Fn m_adj mbuf len
580Trim
581.Fa len
582bytes from the head of an
583.Vt mbuf chain
584if
585.Fa len
586is positive, from the tail otherwise.
587.\"
588.It Fn m_append mbuf len cp
589Append
590.Vt len
591bytes of data
592.Vt cp
593to the
594.Vt mbuf chain .
595Extend the mbuf chain if the new data does not fit in
596existing space.
597.\"
598.It Fn m_prepend mbuf len how
599Allocate a new
600.Vt mbuf
601and prepend it to the
602.Vt mbuf chain ,
603handle
604.Dv M_PKTHDR
605properly.
606.Sy Note :
607It does not allocate any
608.Vt mbuf clusters ,
609so
610.Fa len
611must be less than
612.Dv MLEN
613or
614.Dv MHLEN ,
615depending on the
616.Dv M_PKTHDR
617flag setting.
618.\"
619.It Fn m_copyup mbuf len dstoff
620Similar to
621.Fn m_pullup
622but copies
623.Fa len
624bytes of data into a new mbuf at
625.Fa dstoff
626bytes into the mbuf.
627The
628.Fa dstoff
629argument aligns the data and leaves room for a link layer header.
630Return the new
631.Vt mbuf chain
632on success,
633and frees the
634.Vt mbuf chain
635and returns
636.Dv NULL
637on failure.
638.Sy Note :
639The function does not allocate
640.Vt mbuf clusters ,
641so
642.Fa len + dstoff
643must be less than
644.Dv MHLEN .
645.\"
646.It Fn m_pullup mbuf len
647Arrange that the first
648.Fa len
649bytes of an
650.Vt mbuf chain
651are contiguous and lay in the data area of
652.Fa mbuf ,
653so they are accessible with
654.Fn mtod mbuf type .
655Return the new
656.Vt mbuf chain
657on success,
658.Dv NULL
659on failure
660(the
661.Vt mbuf chain
662is freed in this case).
663.Sy Note :
664It does not allocate any
665.Vt mbuf clusters ,
666so
667.Fa len
668must be less than
669.Dv MHLEN .
670.\"
671.It Fn m_copym mbuf offset len how
672Make a copy of an
673.Vt mbuf chain
674starting
675.Fa offset
676bytes from the beginning, continuing for
677.Fa len
678bytes.
679If
680.Fa len
681is
682.Dv M_COPYALL ,
683copy to the end of the
684.Vt mbuf chain .
685.Sy Note :
686The copy is read-only, because the
687.Vt mbuf clusters
688are not copied, only their reference counts are incremented.
689.\"
690.It Fn m_copypacket mbuf how
691Copy an entire packet including header, which must be present.
692This is an optimized version of the common case
693.Fn m_copym mbuf 0 M_COPYALL how .
694.Sy Note :
695the copy is read-only, because the
696.Vt mbuf clusters
697are not copied, only their reference counts are incremented.
698.\"
699.It Fn m_dup mbuf how
700Copy a packet header
701.Vt mbuf chain
702into a completely new
703.Vt mbuf chain ,
704including copying any
705.Vt mbuf clusters .
706Use this instead of
707.Fn m_copypacket
708when you need a writable copy of an
709.Vt mbuf chain .
710.\"
711.It Fn m_copydata mbuf offset len buf
712Copy data from an
713.Vt mbuf chain
714starting
715.Fa off
716bytes from the beginning, continuing for
717.Fa len
718bytes, into the indicated buffer
719.Fa buf .
720.\"
721.It Fn m_copyback mbuf offset len buf
722Copy
723.Fa len
724bytes from the buffer
725.Fa buf
726back into the indicated
727.Vt mbuf chain ,
728starting at
729.Fa offset
730bytes from the beginning of the
731.Vt mbuf chain ,
732extending the
733.Vt mbuf chain
734if necessary.
735.Sy Note :
736It does not allocate any
737.Vt mbuf clusters ,
738just adds
739.Vt mbufs
740to the
741.Vt mbuf chain .
742It is safe to set
743.Fa offset
744beyond the current
745.Vt mbuf chain
746end: zeroed
747.Vt mbufs
748will be allocated to fill the space.
749.\"
750.It Fn m_length mbuf last
751Return the length of the
752.Vt mbuf chain ,
753and optionally a pointer to the last
754.Vt mbuf .
755.\"
756.It Fn m_dup_pkthdr to from how
757Upon the function's completion, the
758.Vt mbuf
759.Fa to
760will contain an identical copy of
761.Fa from->m_pkthdr
762and the per-packet attributes found in the
763.Vt mbuf chain
764.Fa from .
765The
766.Vt mbuf
767.Fa from
768must have the flag
769.Dv M_PKTHDR
770initially set, and
771.Fa to
772must be empty on entry.
773.\"
774.It Fn m_move_pkthdr to from
775Move
776.Va m_pkthdr
777and the per-packet attributes from the
778.Vt mbuf chain
779.Fa from
780to the
781.Vt mbuf
782.Fa to .
783The
784.Vt mbuf
785.Fa from
786must have the flag
787.Dv M_PKTHDR
788initially set, and
789.Fa to
790must be empty on entry.
791Upon the function's completion,
792.Fa from
793will have the flag
794.Dv M_PKTHDR
795and the per-packet attributes cleared.
796.\"
797.It Fn m_fixhdr mbuf
798Set the packet-header length to the length of the
799.Vt mbuf chain .
800.\"
801.It Fn m_devget buf len offset ifp copy
802Copy data from a device local memory pointed to by
803.Fa buf
804to an
805.Vt mbuf chain .
806The copy is done using a specified copy routine
807.Fa copy ,
808or
809.Fn bcopy
810if
811.Fa copy
812is
813.Dv NULL .
814.\"
815.It Fn m_cat m n
816Concatenate
817.Fa n
818to
819.Fa m .
820Both
821.Vt mbuf chains
822must be of the same type.
823.Fa N
824is still valid after the function returned.
825.Sy Note :
826It does not handle
827.Dv M_PKTHDR
828and friends.
829.\"
830.It Fn m_split mbuf len how
831Partition an
832.Vt mbuf chain
833in two pieces, returning the tail:
834all but the first
835.Fa len
836bytes.
837In case of failure, it returns
838.Dv NULL
839and attempts to restore the
840.Vt mbuf chain
841to its original state.
842.\"
843.It Fn m_apply mbuf off len f arg
844Apply a function to an
845.Vt mbuf chain ,
846at offset
847.Fa off ,
848for length
849.Fa len
850bytes.
851Typically used to avoid calls to
852.Fn m_pullup
853which would otherwise be unnecessary or undesirable.
854.Fa arg
855is a convenience argument which is passed to the callback function
856.Fa f .
857.Pp
858Each time
859.Fn f
860is called, it will be passed
861.Fa arg ,
862a pointer to the
863.Fa data
864in the current mbuf, and the length
865.Fa len
866of the data in this mbuf to which the function should be applied.
867.Pp
868The function should return zero to indicate success;
869otherwise, if an error is indicated, then
870.Fn m_apply
871will return the error and stop iterating through the
872.Vt mbuf chain .
873.\"
874.It Fn m_getptr mbuf loc off
875Return a pointer to the mbuf containing the data located at
876.Fa loc
877bytes from the beginning of the
878.Vt mbuf chain .
879The corresponding offset into the mbuf will be stored in
880.Fa *off .
881.It Fn m_defrag m0 how
882Defragment an mbuf chain, returning the shortest possible
883chain of mbufs and clusters.
884If allocation fails and this can not be completed,
885.Dv NULL
886will be returned and the original chain will be unchanged.
887Upon success, the original chain will be freed and the new
888chain will be returned.
889.Fa how
890should be either
891.Dv M_TRYWAIT
892or
893.Dv M_DONTWAIT ,
894depending on the caller's preference.
895.Pp
896This function is especially useful in network drivers, where
897certain long mbuf chains must be shortened before being added
898to TX descriptor lists.
899.El
900.Sh HARDWARE-ASSISTED CHECKSUM CALCULATION
901This section currently applies to TCP/IP only.
902In order to save the host CPU resources, computing checksums is
903offloaded to the network interface hardware if possible.
904The
905.Va m_pkthdr
906member of the leading
907.Vt mbuf
908of a packet contains two fields used for that purpose,
909.Vt int Va csum_flags
910and
911.Vt int Va csum_data .
912The meaning of those fields depends on the direction a packet flows in,
913and on whether the packet is fragmented.
914Henceforth,
915.Va csum_flags
916or
917.Va csum_data
918of a packet
919will denote the corresponding field of the
920.Va m_pkthdr
921member of the leading
922.Vt mbuf
923in the
924.Vt mbuf chain
925containing the packet.
926.Pp
927On output, checksum offloading is attempted after the outgoing
928interface has been determined for a packet.
929The interface-specific field
930.Va ifnet.if_data.ifi_hwassist
931(see
932.Xr ifnet 9 )
933is consulted for the capabilities of the interface to assist in
934computing checksums.
935The
936.Va csum_flags
937field of the packet header is set to indicate which actions the interface
938is supposed to perform on it.
939The actions unsupported by the network interface are done in the
940software prior to passing the packet down to the interface driver;
941such actions will never be requested through
942.Va csum_flags .
943.Pp
944The flags demanding a particular action from an interface are as follows:
945.Bl -tag -width ".Dv CSUM_TCP" -offset indent
946.It Dv CSUM_IP
947The IP header checksum is to be computed and stored in the
948corresponding field of the packet.
949The hardware is expected to know the format of an IP header
950to determine the offset of the IP checksum field.
951.It Dv CSUM_TCP
952The TCP checksum is to be computed.
953(See below.)
954.It Dv CSUM_UDP
955The UDP checksum is to be computed.
956(See below.)
957.El
958.Pp
959Should a TCP or UDP checksum be offloaded to the hardware,
960the field
961.Va csum_data
962will contain the byte offset of the checksum field relative to the
963end of the IP header.
964In this case, the checksum field will be initially
965set by the TCP/IP module to the checksum of the pseudo header
966defined by the TCP and UDP specifications.
967.Pp
968For outbound packets which have been fragmented
969by the host CPU, the following will also be true,
970regardless of the checksum flag settings:
971.Bl -bullet -offset indent
972.It
973all fragments will have the flag
974.Dv M_FRAG
975set in their
976.Va m_flags
977field;
978.It
979the first and the last fragments in the chain will have
980.Dv M_FIRSTFRAG
981or
982.Dv M_LASTFRAG
983set in their
984.Va m_flags ,
985correspondingly;
986.It
987the first fragment in the chain will have the total number
988of fragments contained in its
989.Va csum_data
990field.
991.El
992.Pp
993The last rule for fragmented packets takes precedence over the one
994for a TCP or UDP checksum.
995Nevertheless, offloading a TCP or UDP checksum is possible for a
996fragmented packet if the flag
997.Dv CSUM_IP_FRAGS
998is set in the field
999.Va ifnet.if_data.ifi_hwassist
1000associated with the network interface.
1001However, in this case the interface is expected to figure out
1002the location of the checksum field within the sequence of fragments
1003by itself because
1004.Va csum_data
1005contains a fragment count instead of a checksum offset value.
1006.Pp
1007On input, an interface indicates the actions it has performed
1008on a packet by setting one or more of the following flags in
1009.Va csum_flags
1010associated with the packet:
1011.Bl -tag -width ".Dv CSUM_IP_CHECKED" -offset indent
1012.It Dv CSUM_IP_CHECKED
1013The IP header checksum has been computed.
1014.It Dv CSUM_IP_VALID
1015The IP header has a valid checksum.
1016This flag can appear only in combination with
1017.Dv CSUM_IP_CHECKED .
1018.It Dv CSUM_DATA_VALID
1019The checksum of the data portion of the IP packet has been computed
1020and stored in the field
1021.Va csum_data
1022in network byte order.
1023.It Dv CSUM_PSEUDO_HDR
1024Can be set only along with
1025.Dv CSUM_DATA_VALID
1026to indicate that the IP data checksum found in
1027.Va csum_data
1028allows for the pseudo header defined by the TCP and UDP specifications.
1029Otherwise the checksum of the pseudo header must be calculated by
1030the host CPU and added to
1031.Va csum_data
1032to obtain the final checksum to be used for TCP or UDP validation purposes.
1033.El
1034.Pp
1035If a particular network interface just indicates success or
1036failure of TCP or UDP checksum validation without returning
1037the exact value of the checksum to the host CPU, its driver can mark
1038.Dv CSUM_DATA_VALID
1039and
1040.Dv CSUM_PSEUDO_HDR
1041in
1042.Va csum_flags ,
1043and set
1044.Va csum_data
1045to
1046.Li 0xFFFF
1047hexadecimal to indicate a valid checksum.
1048It is a peculiarity of the algorithm used that the Internet checksum
1049calculated over any valid packet will be
1050.Li 0xFFFF
1051as long as the original checksum field is included.
1052.Pp
1053For inbound packets which are IP fragments, all
1054.Va csum_data
1055fields will be summed during reassembly to obtain the final checksum
1056value passed to an upper layer in the
1057.Va csum_data
1058field of the reassembled packet.
1059The
1060.Va csum_flags
1061fields of all fragments will be consolidated using logical AND
1062to obtain the final value for
1063.Va csum_flags .
1064Thus, in order to successfully
1065offload checksum computation for fragmented data,
1066all fragments should have the same value of
1067.Va csum_flags .
1068.Sh STRESS TESTING
1069When running a kernel compiled with the option
1070.Dv MBUF_STRESS_TEST ,
1071the following
1072.Xr sysctl 8 Ns
1073-controlled options may be used to create
1074various failure/extreme cases for testing of network drivers
1075and other parts of the kernel that rely on
1076.Vt mbufs .
1077.Bl -tag -width ident
1078.It Va net.inet.ip.mbuf_frag_size
1079Causes
1080.Fn ip_output
1081to fragment outgoing
1082.Vt mbuf chains
1083into fragments of the specified size.
1084Setting this variable to 1 is an excellent way to
1085test the long
1086.Vt mbuf chain
1087handling ability of network drivers.
1088.It Va kern.ipc.m_defragrandomfailures
1089Causes the function
1090.Fn m_defrag
1091to randomly fail, returning
1092.Dv NULL .
1093Any piece of code which uses
1094.Fn m_defrag
1095should be tested with this feature.
1096.El
1097.Sh RETURN VALUES
1098See above.
1099.Sh SEE ALSO
1100.Xr ifnet 9 ,
1101.Xr mbuf_tags 9
1102.Sh HISTORY
1103.\" Please correct me if I'm wrong
1104.Vt Mbufs
1105appeared in an early version of
1106.Bx .
1107Besides being used for network packets, they were used
1108to store various dynamic structures, such as routing table
1109entries, interface addresses, protocol control blocks, etc.
1110.Sh AUTHORS
1111The original
1112.Nm
1113man page was written by Yar Tikhiy.
1114