xref: /freebsd/sys/kern/uipc_mbuf.c (revision d2387d42b8da231a5b95cbc313825fb2aadf26f6)
1 /*
2  * Copyright (c) 1982, 1986, 1988, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
34  */
35 
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
38 
39 #include "opt_mac.h"
40 #include "opt_param.h"
41 #include "opt_mbuf_stress_test.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/kernel.h>
46 #include <sys/limits.h>
47 #include <sys/lock.h>
48 #include <sys/mac.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/sysctl.h>
52 #include <sys/domain.h>
53 #include <sys/protosw.h>
54 #include <sys/uio.h>
55 
56 int	max_linkhdr;
57 int	max_protohdr;
58 int	max_hdr;
59 int	max_datalen;
60 #ifdef MBUF_STRESS_TEST
61 int	m_defragpackets;
62 int	m_defragbytes;
63 int	m_defraguseless;
64 int	m_defragfailure;
65 int	m_defragrandomfailures;
66 #endif
67 
68 /*
69  * sysctl(8) exported objects
70  */
71 SYSCTL_DECL(_kern_ipc);
72 SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW,
73 	   &max_linkhdr, 0, "");
74 SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW,
75 	   &max_protohdr, 0, "");
76 SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, "");
77 SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW,
78 	   &max_datalen, 0, "");
79 #ifdef MBUF_STRESS_TEST
80 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragpackets, CTLFLAG_RD,
81 	   &m_defragpackets, 0, "");
82 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragbytes, CTLFLAG_RD,
83 	   &m_defragbytes, 0, "");
84 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defraguseless, CTLFLAG_RD,
85 	   &m_defraguseless, 0, "");
86 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragfailure, CTLFLAG_RD,
87 	   &m_defragfailure, 0, "");
88 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragrandomfailures, CTLFLAG_RW,
89 	   &m_defragrandomfailures, 0, "");
90 #endif
91 
92 /*
93  * "Move" mbuf pkthdr from "from" to "to".
94  * "from" must have M_PKTHDR set, and "to" must be empty.
95  */
96 void
97 m_move_pkthdr(struct mbuf *to, struct mbuf *from)
98 {
99 
100 #if 0
101 	/* see below for why these are not enabled */
102 	M_ASSERTPKTHDR(to);
103 	/* Note: with MAC, this may not be a good assertion. */
104 	KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags),
105 	    ("m_move_pkthdr: to has tags"));
106 #endif
107 	KASSERT((to->m_flags & M_EXT) == 0, ("m_move_pkthdr: to has cluster"));
108 #ifdef MAC
109 	/*
110 	 * XXXMAC: It could be this should also occur for non-MAC?
111 	 */
112 	if (to->m_flags & M_PKTHDR)
113 		m_tag_delete_chain(to, NULL);
114 #endif
115 	to->m_flags = from->m_flags & M_COPYFLAGS;
116 	to->m_data = to->m_pktdat;
117 	to->m_pkthdr = from->m_pkthdr;		/* especially tags */
118 	SLIST_INIT(&from->m_pkthdr.tags);	/* purge tags from src */
119 	from->m_flags &= ~M_PKTHDR;
120 }
121 
122 /*
123  * Duplicate "from"'s mbuf pkthdr in "to".
124  * "from" must have M_PKTHDR set, and "to" must be empty.
125  * In particular, this does a deep copy of the packet tags.
126  */
127 int
128 m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int how)
129 {
130 
131 #if 0
132 	/*
133 	 * The mbuf allocator only initializes the pkthdr
134 	 * when the mbuf is allocated with MGETHDR. Many users
135 	 * (e.g. m_copy*, m_prepend) use MGET and then
136 	 * smash the pkthdr as needed causing these
137 	 * assertions to trip.  For now just disable them.
138 	 */
139 	M_ASSERTPKTHDR(to);
140 	/* Note: with MAC, this may not be a good assertion. */
141 	KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags), ("m_dup_pkthdr: to has tags"));
142 #endif
143 #ifdef MAC
144 	if (to->m_flags & M_PKTHDR)
145 		m_tag_delete_chain(to, NULL);
146 #endif
147 	to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT);
148 	if ((to->m_flags & M_EXT) == 0)
149 		to->m_data = to->m_pktdat;
150 	to->m_pkthdr = from->m_pkthdr;
151 	SLIST_INIT(&to->m_pkthdr.tags);
152 	return (m_tag_copy_chain(to, from, MBTOM(how)));
153 }
154 
155 /*
156  * Lesser-used path for M_PREPEND:
157  * allocate new mbuf to prepend to chain,
158  * copy junk along.
159  */
160 struct mbuf *
161 m_prepend(struct mbuf *m, int len, int how)
162 {
163 	struct mbuf *mn;
164 
165 	if (m->m_flags & M_PKTHDR)
166 		MGETHDR(mn, how, m->m_type);
167 	else
168 		MGET(mn, how, m->m_type);
169 	if (mn == NULL) {
170 		m_freem(m);
171 		return (NULL);
172 	}
173 	if (m->m_flags & M_PKTHDR)
174 		M_MOVE_PKTHDR(mn, m);
175 	mn->m_next = m;
176 	m = mn;
177 	if (len < MHLEN)
178 		MH_ALIGN(m, len);
179 	m->m_len = len;
180 	return (m);
181 }
182 
183 /*
184  * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
185  * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
186  * The wait parameter is a choice of M_TRYWAIT/M_DONTWAIT from caller.
187  * Note that the copy is read-only, because clusters are not copied,
188  * only their reference counts are incremented.
189  */
190 struct mbuf *
191 m_copym(struct mbuf *m, int off0, int len, int wait)
192 {
193 	struct mbuf *n, **np;
194 	int off = off0;
195 	struct mbuf *top;
196 	int copyhdr = 0;
197 
198 	KASSERT(off >= 0, ("m_copym, negative off %d", off));
199 	KASSERT(len >= 0, ("m_copym, negative len %d", len));
200 	if (off == 0 && m->m_flags & M_PKTHDR)
201 		copyhdr = 1;
202 	while (off > 0) {
203 		KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain"));
204 		if (off < m->m_len)
205 			break;
206 		off -= m->m_len;
207 		m = m->m_next;
208 	}
209 	np = &top;
210 	top = 0;
211 	while (len > 0) {
212 		if (m == NULL) {
213 			KASSERT(len == M_COPYALL,
214 			    ("m_copym, length > size of mbuf chain"));
215 			break;
216 		}
217 		if (copyhdr)
218 			MGETHDR(n, wait, m->m_type);
219 		else
220 			MGET(n, wait, m->m_type);
221 		*np = n;
222 		if (n == NULL)
223 			goto nospace;
224 		if (copyhdr) {
225 			if (!m_dup_pkthdr(n, m, wait))
226 				goto nospace;
227 			if (len == M_COPYALL)
228 				n->m_pkthdr.len -= off0;
229 			else
230 				n->m_pkthdr.len = len;
231 			copyhdr = 0;
232 		}
233 		n->m_len = min(len, m->m_len - off);
234 		if (m->m_flags & M_EXT) {
235 			n->m_data = m->m_data + off;
236 			n->m_ext = m->m_ext;
237 			n->m_flags |= M_EXT;
238 			MEXT_ADD_REF(m);
239 		} else
240 			bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
241 			    (u_int)n->m_len);
242 		if (len != M_COPYALL)
243 			len -= n->m_len;
244 		off = 0;
245 		m = m->m_next;
246 		np = &n->m_next;
247 	}
248 	if (top == NULL)
249 		mbstat.m_mcfail++;	/* XXX: No consistency. */
250 
251 	return (top);
252 nospace:
253 	m_freem(top);
254 	mbstat.m_mcfail++;	/* XXX: No consistency. */
255 	return (NULL);
256 }
257 
258 /*
259  * Copy an entire packet, including header (which must be present).
260  * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'.
261  * Note that the copy is read-only, because clusters are not copied,
262  * only their reference counts are incremented.
263  * Preserve alignment of the first mbuf so if the creator has left
264  * some room at the beginning (e.g. for inserting protocol headers)
265  * the copies still have the room available.
266  */
267 struct mbuf *
268 m_copypacket(struct mbuf *m, int how)
269 {
270 	struct mbuf *top, *n, *o;
271 
272 	MGET(n, how, m->m_type);
273 	top = n;
274 	if (n == NULL)
275 		goto nospace;
276 
277 	if (!m_dup_pkthdr(n, m, how))
278 		goto nospace;
279 	n->m_len = m->m_len;
280 	if (m->m_flags & M_EXT) {
281 		n->m_data = m->m_data;
282 		n->m_ext = m->m_ext;
283 		n->m_flags |= M_EXT;
284 		MEXT_ADD_REF(m);
285 	} else {
286 		n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat );
287 		bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
288 	}
289 
290 	m = m->m_next;
291 	while (m) {
292 		MGET(o, how, m->m_type);
293 		if (o == NULL)
294 			goto nospace;
295 
296 		n->m_next = o;
297 		n = n->m_next;
298 
299 		n->m_len = m->m_len;
300 		if (m->m_flags & M_EXT) {
301 			n->m_data = m->m_data;
302 			n->m_ext = m->m_ext;
303 			n->m_flags |= M_EXT;
304 			MEXT_ADD_REF(m);
305 		} else {
306 			bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
307 		}
308 
309 		m = m->m_next;
310 	}
311 	return top;
312 nospace:
313 	m_freem(top);
314 	mbstat.m_mcfail++;	/* XXX: No consistency. */
315 	return (NULL);
316 }
317 
318 /*
319  * Copy data from an mbuf chain starting "off" bytes from the beginning,
320  * continuing for "len" bytes, into the indicated buffer.
321  */
322 void
323 m_copydata(const struct mbuf *m, int off, int len, caddr_t cp)
324 {
325 	u_int count;
326 
327 	KASSERT(off >= 0, ("m_copydata, negative off %d", off));
328 	KASSERT(len >= 0, ("m_copydata, negative len %d", len));
329 	while (off > 0) {
330 		KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain"));
331 		if (off < m->m_len)
332 			break;
333 		off -= m->m_len;
334 		m = m->m_next;
335 	}
336 	while (len > 0) {
337 		KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain"));
338 		count = min(m->m_len - off, len);
339 		bcopy(mtod(m, caddr_t) + off, cp, count);
340 		len -= count;
341 		cp += count;
342 		off = 0;
343 		m = m->m_next;
344 	}
345 }
346 
347 /*
348  * Copy a packet header mbuf chain into a completely new chain, including
349  * copying any mbuf clusters.  Use this instead of m_copypacket() when
350  * you need a writable copy of an mbuf chain.
351  */
352 struct mbuf *
353 m_dup(struct mbuf *m, int how)
354 {
355 	struct mbuf **p, *top = NULL;
356 	int remain, moff, nsize;
357 
358 	/* Sanity check */
359 	if (m == NULL)
360 		return (NULL);
361 	M_ASSERTPKTHDR(m);
362 
363 	/* While there's more data, get a new mbuf, tack it on, and fill it */
364 	remain = m->m_pkthdr.len;
365 	moff = 0;
366 	p = &top;
367 	while (remain > 0 || top == NULL) {	/* allow m->m_pkthdr.len == 0 */
368 		struct mbuf *n;
369 
370 		/* Get the next new mbuf */
371 		MGET(n, how, m->m_type);
372 		if (n == NULL)
373 			goto nospace;
374 		if (top == NULL) {		/* first one, must be PKTHDR */
375 			if (!m_dup_pkthdr(n, m, how))
376 				goto nospace;
377 			nsize = MHLEN;
378 		} else				/* not the first one */
379 			nsize = MLEN;
380 		if (remain >= MINCLSIZE) {
381 			MCLGET(n, how);
382 			if ((n->m_flags & M_EXT) == 0) {
383 				(void)m_free(n);
384 				goto nospace;
385 			}
386 			nsize = MCLBYTES;
387 		}
388 		n->m_len = 0;
389 
390 		/* Link it into the new chain */
391 		*p = n;
392 		p = &n->m_next;
393 
394 		/* Copy data from original mbuf(s) into new mbuf */
395 		while (n->m_len < nsize && m != NULL) {
396 			int chunk = min(nsize - n->m_len, m->m_len - moff);
397 
398 			bcopy(m->m_data + moff, n->m_data + n->m_len, chunk);
399 			moff += chunk;
400 			n->m_len += chunk;
401 			remain -= chunk;
402 			if (moff == m->m_len) {
403 				m = m->m_next;
404 				moff = 0;
405 			}
406 		}
407 
408 		/* Check correct total mbuf length */
409 		KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL),
410 		    	("%s: bogus m_pkthdr.len", __func__));
411 	}
412 	return (top);
413 
414 nospace:
415 	m_freem(top);
416 	mbstat.m_mcfail++;	/* XXX: No consistency. */
417 	return (NULL);
418 }
419 
420 /*
421  * Concatenate mbuf chain n to m.
422  * Both chains must be of the same type (e.g. MT_DATA).
423  * Any m_pkthdr is not updated.
424  */
425 void
426 m_cat(struct mbuf *m, struct mbuf *n)
427 {
428 	while (m->m_next)
429 		m = m->m_next;
430 	while (n) {
431 		if (m->m_flags & M_EXT ||
432 		    m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
433 			/* just join the two chains */
434 			m->m_next = n;
435 			return;
436 		}
437 		/* splat the data from one into the other */
438 		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
439 		    (u_int)n->m_len);
440 		m->m_len += n->m_len;
441 		n = m_free(n);
442 	}
443 }
444 
445 void
446 m_adj(struct mbuf *mp, int req_len)
447 {
448 	int len = req_len;
449 	struct mbuf *m;
450 	int count;
451 
452 	if ((m = mp) == NULL)
453 		return;
454 	if (len >= 0) {
455 		/*
456 		 * Trim from head.
457 		 */
458 		while (m != NULL && len > 0) {
459 			if (m->m_len <= len) {
460 				len -= m->m_len;
461 				m->m_len = 0;
462 				m = m->m_next;
463 			} else {
464 				m->m_len -= len;
465 				m->m_data += len;
466 				len = 0;
467 			}
468 		}
469 		m = mp;
470 		if (mp->m_flags & M_PKTHDR)
471 			m->m_pkthdr.len -= (req_len - len);
472 	} else {
473 		/*
474 		 * Trim from tail.  Scan the mbuf chain,
475 		 * calculating its length and finding the last mbuf.
476 		 * If the adjustment only affects this mbuf, then just
477 		 * adjust and return.  Otherwise, rescan and truncate
478 		 * after the remaining size.
479 		 */
480 		len = -len;
481 		count = 0;
482 		for (;;) {
483 			count += m->m_len;
484 			if (m->m_next == (struct mbuf *)0)
485 				break;
486 			m = m->m_next;
487 		}
488 		if (m->m_len >= len) {
489 			m->m_len -= len;
490 			if (mp->m_flags & M_PKTHDR)
491 				mp->m_pkthdr.len -= len;
492 			return;
493 		}
494 		count -= len;
495 		if (count < 0)
496 			count = 0;
497 		/*
498 		 * Correct length for chain is "count".
499 		 * Find the mbuf with last data, adjust its length,
500 		 * and toss data from remaining mbufs on chain.
501 		 */
502 		m = mp;
503 		if (m->m_flags & M_PKTHDR)
504 			m->m_pkthdr.len = count;
505 		for (; m; m = m->m_next) {
506 			if (m->m_len >= count) {
507 				m->m_len = count;
508 				break;
509 			}
510 			count -= m->m_len;
511 		}
512 		while (m->m_next)
513 			(m = m->m_next) ->m_len = 0;
514 	}
515 }
516 
517 /*
518  * Rearange an mbuf chain so that len bytes are contiguous
519  * and in the data area of an mbuf (so that mtod and dtom
520  * will work for a structure of size len).  Returns the resulting
521  * mbuf chain on success, frees it and returns null on failure.
522  * If there is room, it will add up to max_protohdr-len extra bytes to the
523  * contiguous region in an attempt to avoid being called next time.
524  */
525 struct mbuf *
526 m_pullup(struct mbuf *n, int len)
527 {
528 	struct mbuf *m;
529 	int count;
530 	int space;
531 
532 	/*
533 	 * If first mbuf has no cluster, and has room for len bytes
534 	 * without shifting current data, pullup into it,
535 	 * otherwise allocate a new mbuf to prepend to the chain.
536 	 */
537 	if ((n->m_flags & M_EXT) == 0 &&
538 	    n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
539 		if (n->m_len >= len)
540 			return (n);
541 		m = n;
542 		n = n->m_next;
543 		len -= m->m_len;
544 	} else {
545 		if (len > MHLEN)
546 			goto bad;
547 		MGET(m, M_DONTWAIT, n->m_type);
548 		if (m == NULL)
549 			goto bad;
550 		m->m_len = 0;
551 		if (n->m_flags & M_PKTHDR)
552 			M_MOVE_PKTHDR(m, n);
553 	}
554 	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
555 	do {
556 		count = min(min(max(len, max_protohdr), space), n->m_len);
557 		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
558 		  (u_int)count);
559 		len -= count;
560 		m->m_len += count;
561 		n->m_len -= count;
562 		space -= count;
563 		if (n->m_len)
564 			n->m_data += count;
565 		else
566 			n = m_free(n);
567 	} while (len > 0 && n);
568 	if (len > 0) {
569 		(void) m_free(m);
570 		goto bad;
571 	}
572 	m->m_next = n;
573 	return (m);
574 bad:
575 	m_freem(n);
576 	mbstat.m_mpfail++;	/* XXX: No consistency. */
577 	return (NULL);
578 }
579 
580 /*
581  * Partition an mbuf chain in two pieces, returning the tail --
582  * all but the first len0 bytes.  In case of failure, it returns NULL and
583  * attempts to restore the chain to its original state.
584  *
585  * Note that the resulting mbufs might be read-only, because the new
586  * mbuf can end up sharing an mbuf cluster with the original mbuf if
587  * the "breaking point" happens to lie within a cluster mbuf. Use the
588  * M_WRITABLE() macro to check for this case.
589  */
590 struct mbuf *
591 m_split(struct mbuf *m0, int len0, int wait)
592 {
593 	struct mbuf *m, *n;
594 	u_int len = len0, remain;
595 
596 	for (m = m0; m && len > m->m_len; m = m->m_next)
597 		len -= m->m_len;
598 	if (m == NULL)
599 		return (NULL);
600 	remain = m->m_len - len;
601 	if (m0->m_flags & M_PKTHDR) {
602 		MGETHDR(n, wait, m0->m_type);
603 		if (n == NULL)
604 			return (NULL);
605 		n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
606 		n->m_pkthdr.len = m0->m_pkthdr.len - len0;
607 		m0->m_pkthdr.len = len0;
608 		if (m->m_flags & M_EXT)
609 			goto extpacket;
610 		if (remain > MHLEN) {
611 			/* m can't be the lead packet */
612 			MH_ALIGN(n, 0);
613 			n->m_next = m_split(m, len, wait);
614 			if (n->m_next == NULL) {
615 				(void) m_free(n);
616 				return (NULL);
617 			} else {
618 				n->m_len = 0;
619 				return (n);
620 			}
621 		} else
622 			MH_ALIGN(n, remain);
623 	} else if (remain == 0) {
624 		n = m->m_next;
625 		m->m_next = NULL;
626 		return (n);
627 	} else {
628 		MGET(n, wait, m->m_type);
629 		if (n == NULL)
630 			return (NULL);
631 		M_ALIGN(n, remain);
632 	}
633 extpacket:
634 	if (m->m_flags & M_EXT) {
635 		n->m_flags |= M_EXT;
636 		n->m_ext = m->m_ext;
637 		MEXT_ADD_REF(m);
638 		n->m_data = m->m_data + len;
639 	} else {
640 		bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
641 	}
642 	n->m_len = remain;
643 	m->m_len = len;
644 	n->m_next = m->m_next;
645 	m->m_next = NULL;
646 	return (n);
647 }
648 /*
649  * Routine to copy from device local memory into mbufs.
650  * Note that `off' argument is offset into first mbuf of target chain from
651  * which to begin copying the data to.
652  */
653 struct mbuf *
654 m_devget(char *buf, int totlen, int off, struct ifnet *ifp,
655 	 void (*copy)(char *from, caddr_t to, u_int len))
656 {
657 	struct mbuf *m;
658 	struct mbuf *top = 0, **mp = &top;
659 	int len;
660 
661 	if (off < 0 || off > MHLEN)
662 		return (NULL);
663 
664 	MGETHDR(m, M_DONTWAIT, MT_DATA);
665 	if (m == NULL)
666 		return (NULL);
667 	m->m_pkthdr.rcvif = ifp;
668 	m->m_pkthdr.len = totlen;
669 	len = MHLEN;
670 
671 	while (totlen > 0) {
672 		if (top) {
673 			MGET(m, M_DONTWAIT, MT_DATA);
674 			if (m == NULL) {
675 				m_freem(top);
676 				return (NULL);
677 			}
678 			len = MLEN;
679 		}
680 		if (totlen + off >= MINCLSIZE) {
681 			MCLGET(m, M_DONTWAIT);
682 			if (m->m_flags & M_EXT)
683 				len = MCLBYTES;
684 		} else {
685 			/*
686 			 * Place initial small packet/header at end of mbuf.
687 			 */
688 			if (top == NULL && totlen + off + max_linkhdr <= len) {
689 				m->m_data += max_linkhdr;
690 				len -= max_linkhdr;
691 			}
692 		}
693 		if (off) {
694 			m->m_data += off;
695 			len -= off;
696 			off = 0;
697 		}
698 		m->m_len = len = min(totlen, len);
699 		if (copy)
700 			copy(buf, mtod(m, caddr_t), (u_int)len);
701 		else
702 			bcopy(buf, mtod(m, caddr_t), (u_int)len);
703 		buf += len;
704 		*mp = m;
705 		mp = &m->m_next;
706 		totlen -= len;
707 	}
708 	return (top);
709 }
710 
711 /*
712  * Copy data from a buffer back into the indicated mbuf chain,
713  * starting "off" bytes from the beginning, extending the mbuf
714  * chain if necessary.
715  */
716 void
717 m_copyback(struct mbuf *m0, int off, int len, caddr_t cp)
718 {
719 	int mlen;
720 	struct mbuf *m = m0, *n;
721 	int totlen = 0;
722 
723 	if (m0 == NULL)
724 		return;
725 	while (off > (mlen = m->m_len)) {
726 		off -= mlen;
727 		totlen += mlen;
728 		if (m->m_next == NULL) {
729 			n = m_get_clrd(M_DONTWAIT, m->m_type);
730 			if (n == NULL)
731 				goto out;
732 			n->m_len = min(MLEN, len + off);
733 			m->m_next = n;
734 		}
735 		m = m->m_next;
736 	}
737 	while (len > 0) {
738 		mlen = min (m->m_len - off, len);
739 		bcopy(cp, off + mtod(m, caddr_t), (u_int)mlen);
740 		cp += mlen;
741 		len -= mlen;
742 		mlen += off;
743 		off = 0;
744 		totlen += mlen;
745 		if (len == 0)
746 			break;
747 		if (m->m_next == NULL) {
748 			n = m_get(M_DONTWAIT, m->m_type);
749 			if (n == NULL)
750 				break;
751 			n->m_len = min(MLEN, len);
752 			m->m_next = n;
753 		}
754 		m = m->m_next;
755 	}
756 out:	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
757 		m->m_pkthdr.len = totlen;
758 }
759 
760 /*
761  * Apply function f to the data in an mbuf chain starting "off" bytes from
762  * the beginning, continuing for "len" bytes.
763  */
764 int
765 m_apply(struct mbuf *m, int off, int len,
766     int (*f)(void *, void *, u_int), void *arg)
767 {
768 	u_int count;
769 	int rval;
770 
771 	KASSERT(off >= 0, ("m_apply, negative off %d", off));
772 	KASSERT(len >= 0, ("m_apply, negative len %d", len));
773 	while (off > 0) {
774 		KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain"));
775 		if (off < m->m_len)
776 			break;
777 		off -= m->m_len;
778 		m = m->m_next;
779 	}
780 	while (len > 0) {
781 		KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain"));
782 		count = min(m->m_len - off, len);
783 		rval = (*f)(arg, mtod(m, caddr_t) + off, count);
784 		if (rval)
785 			return (rval);
786 		len -= count;
787 		off = 0;
788 		m = m->m_next;
789 	}
790 	return (0);
791 }
792 
793 /*
794  * Return a pointer to mbuf/offset of location in mbuf chain.
795  */
796 struct mbuf *
797 m_getptr(struct mbuf *m, int loc, int *off)
798 {
799 
800 	while (loc >= 0) {
801 		/* Normal end of search. */
802 		if (m->m_len > loc) {
803 			*off = loc;
804 			return (m);
805 		} else {
806 			loc -= m->m_len;
807 			if (m->m_next == NULL) {
808 				if (loc == 0) {
809 					/* Point at the end of valid data. */
810 					*off = m->m_len;
811 					return (m);
812 				}
813 				return (NULL);
814 			}
815 			m = m->m_next;
816 		}
817 	}
818 	return (NULL);
819 }
820 
821 void
822 m_print(const struct mbuf *m)
823 {
824 	int len;
825 	const struct mbuf *m2;
826 
827 	len = m->m_pkthdr.len;
828 	m2 = m;
829 	while (len) {
830 		printf("%p %*D\n", m2, m2->m_len, (u_char *)m2->m_data, "-");
831 		len -= m2->m_len;
832 		m2 = m2->m_next;
833 	}
834 	return;
835 }
836 
837 u_int
838 m_fixhdr(struct mbuf *m0)
839 {
840 	u_int len;
841 
842 	len = m_length(m0, NULL);
843 	m0->m_pkthdr.len = len;
844 	return (len);
845 }
846 
847 u_int
848 m_length(struct mbuf *m0, struct mbuf **last)
849 {
850 	struct mbuf *m;
851 	u_int len;
852 
853 	len = 0;
854 	for (m = m0; m != NULL; m = m->m_next) {
855 		len += m->m_len;
856 		if (m->m_next == NULL)
857 			break;
858 	}
859 	if (last != NULL)
860 		*last = m;
861 	return (len);
862 }
863 
864 /*
865  * Defragment a mbuf chain, returning the shortest possible
866  * chain of mbufs and clusters.  If allocation fails and
867  * this cannot be completed, NULL will be returned, but
868  * the passed in chain will be unchanged.  Upon success,
869  * the original chain will be freed, and the new chain
870  * will be returned.
871  *
872  * If a non-packet header is passed in, the original
873  * mbuf (chain?) will be returned unharmed.
874  */
875 struct mbuf *
876 m_defrag(struct mbuf *m0, int how)
877 {
878 	struct mbuf *m_new = NULL, *m_final = NULL;
879 	int progress = 0, length;
880 
881 	if (!(m0->m_flags & M_PKTHDR))
882 		return (m0);
883 
884 	m_fixhdr(m0); /* Needed sanity check */
885 
886 #ifdef MBUF_STRESS_TEST
887 	if (m_defragrandomfailures) {
888 		int temp = arc4random() & 0xff;
889 		if (temp == 0xba)
890 			goto nospace;
891 	}
892 #endif
893 
894 	if (m0->m_pkthdr.len > MHLEN)
895 		m_final = m_getcl(how, MT_DATA, M_PKTHDR);
896 	else
897 		m_final = m_gethdr(how, MT_DATA);
898 
899 	if (m_final == NULL)
900 		goto nospace;
901 
902 	if (m_dup_pkthdr(m_final, m0, how) == 0)
903 		goto nospace;
904 
905 	m_new = m_final;
906 
907 	while (progress < m0->m_pkthdr.len) {
908 		length = m0->m_pkthdr.len - progress;
909 		if (length > MCLBYTES)
910 			length = MCLBYTES;
911 
912 		if (m_new == NULL) {
913 			if (length > MLEN)
914 				m_new = m_getcl(how, MT_DATA, 0);
915 			else
916 				m_new = m_get(how, MT_DATA);
917 			if (m_new == NULL)
918 				goto nospace;
919 		}
920 
921 		m_copydata(m0, progress, length, mtod(m_new, caddr_t));
922 		progress += length;
923 		m_new->m_len = length;
924 		if (m_new != m_final)
925 			m_cat(m_final, m_new);
926 		m_new = NULL;
927 	}
928 #ifdef MBUF_STRESS_TEST
929 	if (m0->m_next == NULL)
930 		m_defraguseless++;
931 #endif
932 	m_freem(m0);
933 	m0 = m_final;
934 #ifdef MBUF_STRESS_TEST
935 	m_defragpackets++;
936 	m_defragbytes += m0->m_pkthdr.len;
937 #endif
938 	return (m0);
939 nospace:
940 #ifdef MBUF_STRESS_TEST
941 	m_defragfailure++;
942 #endif
943 	if (m_new)
944 		m_free(m_new);
945 	if (m_final)
946 		m_freem(m_final);
947 	return (NULL);
948 }
949 
950 #ifdef MBUF_STRESS_TEST
951 
952 /*
953  * Fragment an mbuf chain.  There's no reason you'd ever want to do
954  * this in normal usage, but it's great for stress testing various
955  * mbuf consumers.
956  *
957  * If fragmentation is not possible, the original chain will be
958  * returned.
959  *
960  * Possible length values:
961  * 0	 no fragmentation will occur
962  * > 0	each fragment will be of the specified length
963  * -1	each fragment will be the same random value in length
964  * -2	each fragment's length will be entirely random
965  * (Random values range from 1 to 256)
966  */
967 struct mbuf *
968 m_fragment(struct mbuf *m0, int how, int length)
969 {
970 	struct mbuf *m_new = NULL, *m_final = NULL;
971 	int progress = 0;
972 
973 	if (!(m0->m_flags & M_PKTHDR))
974 		return (m0);
975 
976 	if ((length == 0) || (length < -2))
977 		return (m0);
978 
979 	m_fixhdr(m0); /* Needed sanity check */
980 
981 	m_final = m_getcl(how, MT_DATA, M_PKTHDR);
982 
983 	if (m_final == NULL)
984 		goto nospace;
985 
986 	if (m_dup_pkthdr(m_final, m0, how) == 0)
987 		goto nospace;
988 
989 	m_new = m_final;
990 
991 	if (length == -1)
992 		length = 1 + (arc4random() & 255);
993 
994 	while (progress < m0->m_pkthdr.len) {
995 		int fraglen;
996 
997 		if (length > 0)
998 			fraglen = length;
999 		else
1000 			fraglen = 1 + (arc4random() & 255);
1001 		if (fraglen > m0->m_pkthdr.len - progress)
1002 			fraglen = m0->m_pkthdr.len - progress;
1003 
1004 		if (fraglen > MCLBYTES)
1005 			fraglen = MCLBYTES;
1006 
1007 		if (m_new == NULL) {
1008 			m_new = m_getcl(how, MT_DATA, 0);
1009 			if (m_new == NULL)
1010 				goto nospace;
1011 		}
1012 
1013 		m_copydata(m0, progress, fraglen, mtod(m_new, caddr_t));
1014 		progress += fraglen;
1015 		m_new->m_len = fraglen;
1016 		if (m_new != m_final)
1017 			m_cat(m_final, m_new);
1018 		m_new = NULL;
1019 	}
1020 	m_freem(m0);
1021 	m0 = m_final;
1022 	return (m0);
1023 nospace:
1024 	if (m_new)
1025 		m_free(m_new);
1026 	if (m_final)
1027 		m_freem(m_final);
1028 	/* Return the original chain on failure */
1029 	return (m0);
1030 }
1031 
1032 #endif
1033 
1034 struct mbuf *
1035 m_uiotombuf(struct uio *uio, int how, int len)
1036 {
1037 	struct mbuf *m_new = NULL, *m_final = NULL;
1038 	int progress = 0, error = 0, length, total;
1039 
1040 	if (len > 0)
1041 		total = min(uio->uio_resid, len);
1042 	else
1043 		total = uio->uio_resid;
1044 	if (total > MHLEN)
1045 		m_final = m_getcl(how, MT_DATA, M_PKTHDR);
1046 	else
1047 		m_final = m_gethdr(how, MT_DATA);
1048 	if (m_final == NULL)
1049 		goto nospace;
1050 	m_new = m_final;
1051 	while (progress < total) {
1052 		length = total - progress;
1053 		if (length > MCLBYTES)
1054 			length = MCLBYTES;
1055 		if (m_new == NULL) {
1056 			if (length > MLEN)
1057 				m_new = m_getcl(how, MT_DATA, 0);
1058 			else
1059 				m_new = m_get(how, MT_DATA);
1060 			if (m_new == NULL)
1061 				goto nospace;
1062 		}
1063 		error = uiomove(mtod(m_new, void *), length, uio);
1064 		if (error)
1065 			goto nospace;
1066 		progress += length;
1067 		m_new->m_len = length;
1068 		if (m_new != m_final)
1069 			m_cat(m_final, m_new);
1070 		m_new = NULL;
1071 	}
1072 	m_fixhdr(m_final);
1073 	return (m_final);
1074 nospace:
1075 	if (m_new)
1076 		m_free(m_new);
1077 	if (m_final)
1078 		m_freem(m_final);
1079 	return (NULL);
1080 }
1081