xref: /freebsd/sys/kern/uipc_mbuf.c (revision 23f282aa31e9b6fceacd449020e936e98d6f2298)
1 /*
2  * Copyright (c) 1982, 1986, 1988, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
34  * $FreeBSD$
35  */
36 
37 #include "opt_param.h"
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/kernel.h>
43 #include <sys/sysctl.h>
44 #include <sys/domain.h>
45 #include <sys/protosw.h>
46 
47 #include <vm/vm.h>
48 #include <vm/vm_kern.h>
49 #include <vm/vm_extern.h>
50 
51 #ifdef INVARIANTS
52 #include <machine/cpu.h>
53 #endif
54 
55 static void mbinit __P((void *));
56 SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbinit, NULL)
57 
58 struct mbuf *mbutl;
59 char	*mclrefcnt;
60 struct mbstat mbstat;
61 struct mbuf *mmbfree;
62 union mcluster *mclfree;
63 int	max_linkhdr;
64 int	max_protohdr;
65 int	max_hdr;
66 int	max_datalen;
67 int	nmbclusters;
68 int	nmbufs;
69 u_int	m_mballoc_wid = 0;
70 u_int	m_clalloc_wid = 0;
71 
72 SYSCTL_DECL(_kern_ipc);
73 SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW,
74 	   &max_linkhdr, 0, "");
75 SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW,
76 	   &max_protohdr, 0, "");
77 SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, "");
78 SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW,
79 	   &max_datalen, 0, "");
80 SYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW,
81 	   &mbuf_wait, 0, "");
82 SYSCTL_STRUCT(_kern_ipc, KIPC_MBSTAT, mbstat, CTLFLAG_RW, &mbstat, mbstat, "");
83 SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD,
84 	   &nmbclusters, 0, "Maximum number of mbuf clusters available");
85 SYSCTL_INT(_kern_ipc, OID_AUTO, nmbufs, CTLFLAG_RD, &nmbufs, 0,
86 	   "Maximum number of mbufs available");
87 #ifndef NMBCLUSTERS
88 #define NMBCLUSTERS	(512 + MAXUSERS * 16)
89 #endif
90 TUNABLE_INT_DECL("kern.ipc.nmbclusters", NMBCLUSTERS, nmbclusters);
91 TUNABLE_INT_DECL("kern.ipc.nmbufs", NMBCLUSTERS * 4, nmbufs);
92 
93 static void	m_reclaim __P((void));
94 
95 /* "number of clusters of pages" */
96 #define NCL_INIT	1
97 
98 #define NMB_INIT	16
99 
100 /* ARGSUSED*/
101 static void
102 mbinit(dummy)
103 	void *dummy;
104 {
105 	int s;
106 
107 	mmbfree = NULL; mclfree = NULL;
108 	mbstat.m_msize = MSIZE;
109 	mbstat.m_mclbytes = MCLBYTES;
110 	mbstat.m_minclsize = MINCLSIZE;
111 	mbstat.m_mlen = MLEN;
112 	mbstat.m_mhlen = MHLEN;
113 
114 	s = splimp();
115 	if (m_mballoc(NMB_INIT, M_DONTWAIT) == 0)
116 		goto bad;
117 #if MCLBYTES <= PAGE_SIZE
118 	if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0)
119 		goto bad;
120 #else
121 	/* It's OK to call contigmalloc in this context. */
122 	if (m_clalloc(16, M_WAIT) == 0)
123 		goto bad;
124 #endif
125 	splx(s);
126 	return;
127 bad:
128 	panic("mbinit");
129 }
130 
131 /*
132  * Allocate at least nmb mbufs and place on mbuf free list.
133  * Must be called at splimp.
134  */
135 /* ARGSUSED */
136 int
137 m_mballoc(nmb, how)
138 	register int nmb;
139 	int how;
140 {
141 	register caddr_t p;
142 	register int i;
143 	int nbytes;
144 
145 	/*
146 	 * If we've hit the mbuf limit, stop allocating from mb_map,
147 	 * (or trying to) in order to avoid dipping into the section of
148 	 * mb_map which we've "reserved" for clusters.
149 	 */
150 	if ((nmb + mbstat.m_mbufs) > nmbufs)
151 		return (0);
152 
153 	/*
154 	 * Once we run out of map space, it will be impossible to get
155 	 * any more (nothing is ever freed back to the map)
156 	 * -- however you are not dead as m_reclaim might
157 	 * still be able to free a substantial amount of space.
158 	 *
159 	 * XXX Furthermore, we can also work with "recycled" mbufs (when
160 	 * we're calling with M_WAIT the sleep procedure will be woken
161 	 * up when an mbuf is freed. See m_mballoc_wait()).
162 	 */
163 	if (mb_map_full)
164 		return (0);
165 
166 	nbytes = round_page(nmb * MSIZE);
167 	p = (caddr_t)kmem_malloc(mb_map, nbytes, M_NOWAIT);
168 	if (p == 0 && how == M_WAIT) {
169 		mbstat.m_wait++;
170 		p = (caddr_t)kmem_malloc(mb_map, nbytes, M_WAITOK);
171 	}
172 
173 	/*
174 	 * Either the map is now full, or `how' is M_NOWAIT and there
175 	 * are no pages left.
176 	 */
177 	if (p == NULL)
178 		return (0);
179 
180 	nmb = nbytes / MSIZE;
181 	for (i = 0; i < nmb; i++) {
182 		((struct mbuf *)p)->m_next = mmbfree;
183 		mmbfree = (struct mbuf *)p;
184 		p += MSIZE;
185 	}
186 	mbstat.m_mbufs += nmb;
187 	return (1);
188 }
189 
190 /*
191  * Once the mb_map has been exhausted and if the call to the allocation macros
192  * (or, in some cases, functions) is with M_WAIT, then it is necessary to rely
193  * solely on reclaimed mbufs. Here we wait for an mbuf to be freed for a
194  * designated (mbuf_wait) time.
195  */
196 struct mbuf *
197 m_mballoc_wait(int caller, int type)
198 {
199 	struct mbuf *p;
200 	int s;
201 
202 	m_mballoc_wid++;
203 	if ((tsleep(&m_mballoc_wid, PVM, "mballc", mbuf_wait)) == EWOULDBLOCK)
204 		m_mballoc_wid--;
205 
206 	/*
207 	 * Now that we (think) that we've got something, we will redo an
208 	 * MGET, but avoid getting into another instance of m_mballoc_wait()
209 	 * XXX: We retry to fetch _even_ if the sleep timed out. This is left
210 	 *      this way, purposely, in the [unlikely] case that an mbuf was
211 	 *      freed but the sleep was not awakened in time.
212 	 */
213 	p = NULL;
214 	switch (caller) {
215 	case MGET_C:
216 		MGET(p, M_DONTWAIT, type);
217 		break;
218 	case MGETHDR_C:
219 		MGETHDR(p, M_DONTWAIT, type);
220 		break;
221 	default:
222 		panic("m_mballoc_wait: invalid caller (%d)", caller);
223 	}
224 
225 	s = splimp();
226 	if (p != NULL) {		/* We waited and got something... */
227 		mbstat.m_wait++;
228 		/* Wake up another if we have more free. */
229 		if (mmbfree != NULL)
230 			MMBWAKEUP();
231 	}
232 	splx(s);
233 	return (p);
234 }
235 
236 #if MCLBYTES > PAGE_SIZE
237 static int i_want_my_mcl;
238 
239 static void
240 kproc_mclalloc(void)
241 {
242 	int status;
243 
244 	while (1) {
245 		tsleep(&i_want_my_mcl, PVM, "mclalloc", 0);
246 
247 		for (; i_want_my_mcl; i_want_my_mcl--) {
248 			if (m_clalloc(1, M_WAIT) == 0)
249 				printf("m_clalloc failed even in process context!\n");
250 		}
251 	}
252 }
253 
254 static struct proc *mclallocproc;
255 static struct kproc_desc mclalloc_kp = {
256 	"mclalloc",
257 	kproc_mclalloc,
258 	&mclallocproc
259 };
260 SYSINIT(mclallocproc, SI_SUB_KTHREAD_UPDATE, SI_ORDER_ANY, kproc_start,
261 	   &mclalloc_kp);
262 #endif
263 
264 /*
265  * Allocate some number of mbuf clusters
266  * and place on cluster free list.
267  * Must be called at splimp.
268  */
269 /* ARGSUSED */
270 int
271 m_clalloc(ncl, how)
272 	register int ncl;
273 	int how;
274 {
275 	register caddr_t p;
276 	register int i;
277 	int npg;
278 
279 	/*
280 	 * If we've hit the mcluster number limit, stop allocating from
281 	 * mb_map, (or trying to) in order to avoid dipping into the section
282 	 * of mb_map which we've "reserved" for mbufs.
283 	 */
284 	if ((ncl + mbstat.m_clusters) > nmbclusters) {
285 		mbstat.m_drops++;
286 		return (0);
287 	}
288 
289 	/*
290 	 * Once we run out of map space, it will be impossible
291 	 * to get any more (nothing is ever freed back to the
292 	 * map). From this point on, we solely rely on freed
293 	 * mclusters.
294 	 */
295 	if (mb_map_full) {
296 		mbstat.m_drops++;
297 		return (0);
298 	}
299 
300 #if MCLBYTES > PAGE_SIZE
301 	if (how != M_WAIT) {
302 		i_want_my_mcl += ncl;
303 		wakeup(&i_want_my_mcl);
304 		mbstat.m_wait++;
305 		p = 0;
306 	} else {
307 		p = contigmalloc1(MCLBYTES * ncl, M_DEVBUF, M_WAITOK, 0ul,
308 				  ~0ul, PAGE_SIZE, 0, mb_map);
309 	}
310 #else
311 	npg = ncl;
312 	p = (caddr_t)kmem_malloc(mb_map, ctob(npg),
313 				 how != M_WAIT ? M_NOWAIT : M_WAITOK);
314 	ncl = ncl * PAGE_SIZE / MCLBYTES;
315 #endif
316 	/*
317 	 * Either the map is now full, or `how' is M_NOWAIT and there
318 	 * are no pages left.
319 	 */
320 	if (p == NULL) {
321 		mbstat.m_drops++;
322 		return (0);
323 	}
324 
325 	for (i = 0; i < ncl; i++) {
326 		((union mcluster *)p)->mcl_next = mclfree;
327 		mclfree = (union mcluster *)p;
328 		p += MCLBYTES;
329 		mbstat.m_clfree++;
330 	}
331 	mbstat.m_clusters += ncl;
332 	return (1);
333 }
334 
335 /*
336  * Once the mb_map submap has been exhausted and the allocation is called with
337  * M_WAIT, we rely on the mclfree union pointers. If nothing is free, we will
338  * sleep for a designated amount of time (mbuf_wait) or until we're woken up
339  * due to sudden mcluster availability.
340  */
341 caddr_t
342 m_clalloc_wait(void)
343 {
344 	caddr_t p;
345 	int s;
346 
347 #ifdef __i386__
348 	/* If in interrupt context, and INVARIANTS, maintain sanity and die. */
349 	KASSERT(intr_nesting_level == 0, ("CLALLOC: CANNOT WAIT IN INTERRUPT"));
350 #endif
351 
352 	/* Sleep until something's available or until we expire. */
353 	m_clalloc_wid++;
354 	if ((tsleep(&m_clalloc_wid, PVM, "mclalc", mbuf_wait)) == EWOULDBLOCK)
355 		m_clalloc_wid--;
356 
357 	/*
358 	 * Now that we (think) that we've got something, we will redo and
359 	 * MGET, but avoid getting into another instance of m_clalloc_wait()
360 	 */
361 	p = NULL;
362 	MCLALLOC(p, M_DONTWAIT);
363 
364 	s = splimp();
365 	if (p != NULL) {	/* We waited and got something... */
366 		mbstat.m_wait++;
367 		/* Wake up another if we have more free. */
368 		if (mclfree != NULL)
369 			MCLWAKEUP();
370 	}
371 
372 	splx(s);
373 	return (p);
374 }
375 
376 /*
377  * When MGET fails, ask protocols to free space when short of memory,
378  * then re-attempt to allocate an mbuf.
379  */
380 struct mbuf *
381 m_retry(i, t)
382 	int i, t;
383 {
384 	register struct mbuf *m;
385 
386 	/*
387 	 * Must only do the reclaim if not in an interrupt context.
388 	 */
389 	if (i == M_WAIT) {
390 #ifdef __i386__
391 		KASSERT(intr_nesting_level == 0,
392 		    ("MBALLOC: CANNOT WAIT IN INTERRUPT"));
393 #endif
394 		m_reclaim();
395 	}
396 
397 	/*
398 	 * Both m_mballoc_wait and m_retry must be nulled because
399 	 * when the MGET macro is run from here, we deffinately do _not_
400 	 * want to enter an instance of m_mballoc_wait() or m_retry() (again!)
401 	 */
402 #define m_mballoc_wait(caller,type)    (struct mbuf *)0
403 #define m_retry(i, t)	(struct mbuf *)0
404 	MGET(m, i, t);
405 #undef m_retry
406 #undef m_mballoc_wait
407 
408 	if (m != NULL)
409 		mbstat.m_wait++;
410 	else
411 		mbstat.m_drops++;
412 
413 	return (m);
414 }
415 
416 /*
417  * As above; retry an MGETHDR.
418  */
419 struct mbuf *
420 m_retryhdr(i, t)
421 	int i, t;
422 {
423 	register struct mbuf *m;
424 
425 	/*
426 	 * Must only do the reclaim if not in an interrupt context.
427 	 */
428 	if (i == M_WAIT) {
429 #ifdef __i386__
430 		KASSERT(intr_nesting_level == 0,
431 		    ("MBALLOC: CANNOT WAIT IN INTERRUPT"));
432 #endif
433 		m_reclaim();
434 	}
435 
436 #define m_mballoc_wait(caller,type)    (struct mbuf *)0
437 #define m_retryhdr(i, t) (struct mbuf *)0
438 	MGETHDR(m, i, t);
439 #undef m_retryhdr
440 #undef m_mballoc_wait
441 
442 	if (m != NULL)
443 		mbstat.m_wait++;
444 	else
445 		mbstat.m_drops++;
446 
447 	return (m);
448 }
449 
450 static void
451 m_reclaim()
452 {
453 	register struct domain *dp;
454 	register struct protosw *pr;
455 	int s = splimp();
456 
457 	for (dp = domains; dp; dp = dp->dom_next)
458 		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
459 			if (pr->pr_drain)
460 				(*pr->pr_drain)();
461 	splx(s);
462 	mbstat.m_drain++;
463 }
464 
465 /*
466  * Space allocation routines.
467  * These are also available as macros
468  * for critical paths.
469  */
470 struct mbuf *
471 m_get(how, type)
472 	int how, type;
473 {
474 	register struct mbuf *m;
475 
476 	MGET(m, how, type);
477 	return (m);
478 }
479 
480 struct mbuf *
481 m_gethdr(how, type)
482 	int how, type;
483 {
484 	register struct mbuf *m;
485 
486 	MGETHDR(m, how, type);
487 	return (m);
488 }
489 
490 struct mbuf *
491 m_getclr(how, type)
492 	int how, type;
493 {
494 	register struct mbuf *m;
495 
496 	MGET(m, how, type);
497 	if (m == 0)
498 		return (0);
499 	bzero(mtod(m, caddr_t), MLEN);
500 	return (m);
501 }
502 
503 struct mbuf *
504 m_free(m)
505 	struct mbuf *m;
506 {
507 	register struct mbuf *n;
508 
509 	MFREE(m, n);
510 	return (n);
511 }
512 
513 void
514 m_freem(m)
515 	register struct mbuf *m;
516 {
517 	register struct mbuf *n;
518 
519 	if (m == NULL)
520 		return;
521 	do {
522 		MFREE(m, n);
523 		m = n;
524 	} while (m);
525 }
526 
527 /*
528  * Mbuffer utility routines.
529  */
530 
531 /*
532  * Lesser-used path for M_PREPEND:
533  * allocate new mbuf to prepend to chain,
534  * copy junk along.
535  */
536 struct mbuf *
537 m_prepend(m, len, how)
538 	register struct mbuf *m;
539 	int len, how;
540 {
541 	struct mbuf *mn;
542 
543 	MGET(mn, how, m->m_type);
544 	if (mn == (struct mbuf *)NULL) {
545 		m_freem(m);
546 		return ((struct mbuf *)NULL);
547 	}
548 	if (m->m_flags & M_PKTHDR) {
549 		M_COPY_PKTHDR(mn, m);
550 		m->m_flags &= ~M_PKTHDR;
551 	}
552 	mn->m_next = m;
553 	m = mn;
554 	if (len < MHLEN)
555 		MH_ALIGN(m, len);
556 	m->m_len = len;
557 	return (m);
558 }
559 
560 /*
561  * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
562  * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
563  * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
564  * Note that the copy is read-only, because clusters are not copied,
565  * only their reference counts are incremented.
566  */
567 #define MCFail (mbstat.m_mcfail)
568 
569 struct mbuf *
570 m_copym(m, off0, len, wait)
571 	register struct mbuf *m;
572 	int off0, wait;
573 	register int len;
574 {
575 	register struct mbuf *n, **np;
576 	register int off = off0;
577 	struct mbuf *top;
578 	int copyhdr = 0;
579 
580 	KASSERT(off >= 0, ("m_copym, negative off %d", off));
581 	KASSERT(len >= 0, ("m_copym, negative len %d", len));
582 	if (off == 0 && m->m_flags & M_PKTHDR)
583 		copyhdr = 1;
584 	while (off > 0) {
585 		KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain"));
586 		if (off < m->m_len)
587 			break;
588 		off -= m->m_len;
589 		m = m->m_next;
590 	}
591 	np = &top;
592 	top = 0;
593 	while (len > 0) {
594 		if (m == 0) {
595 			KASSERT(len == M_COPYALL,
596 			    ("m_copym, length > size of mbuf chain"));
597 			break;
598 		}
599 		MGET(n, wait, m->m_type);
600 		*np = n;
601 		if (n == 0)
602 			goto nospace;
603 		if (copyhdr) {
604 			M_COPY_PKTHDR(n, m);
605 			if (len == M_COPYALL)
606 				n->m_pkthdr.len -= off0;
607 			else
608 				n->m_pkthdr.len = len;
609 			copyhdr = 0;
610 		}
611 		n->m_len = min(len, m->m_len - off);
612 		if (m->m_flags & M_EXT) {
613 			n->m_data = m->m_data + off;
614 			if(!m->m_ext.ext_ref)
615 				mclrefcnt[mtocl(m->m_ext.ext_buf)]++;
616 			else
617 				(*(m->m_ext.ext_ref))(m->m_ext.ext_buf,
618 							m->m_ext.ext_size);
619 			n->m_ext = m->m_ext;
620 			n->m_flags |= M_EXT;
621 		} else
622 			bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
623 			    (unsigned)n->m_len);
624 		if (len != M_COPYALL)
625 			len -= n->m_len;
626 		off = 0;
627 		m = m->m_next;
628 		np = &n->m_next;
629 	}
630 	if (top == 0)
631 		MCFail++;
632 	return (top);
633 nospace:
634 	m_freem(top);
635 	MCFail++;
636 	return (0);
637 }
638 
639 /*
640  * Copy an entire packet, including header (which must be present).
641  * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'.
642  * Note that the copy is read-only, because clusters are not copied,
643  * only their reference counts are incremented.
644  */
645 struct mbuf *
646 m_copypacket(m, how)
647 	struct mbuf *m;
648 	int how;
649 {
650 	struct mbuf *top, *n, *o;
651 
652 	MGET(n, how, m->m_type);
653 	top = n;
654 	if (!n)
655 		goto nospace;
656 
657 	M_COPY_PKTHDR(n, m);
658 	n->m_len = m->m_len;
659 	if (m->m_flags & M_EXT) {
660 		n->m_data = m->m_data;
661 		if(!m->m_ext.ext_ref)
662 			mclrefcnt[mtocl(m->m_ext.ext_buf)]++;
663 		else
664 			(*(m->m_ext.ext_ref))(m->m_ext.ext_buf,
665 						m->m_ext.ext_size);
666 		n->m_ext = m->m_ext;
667 		n->m_flags |= M_EXT;
668 	} else {
669 		bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
670 	}
671 
672 	m = m->m_next;
673 	while (m) {
674 		MGET(o, how, m->m_type);
675 		if (!o)
676 			goto nospace;
677 
678 		n->m_next = o;
679 		n = n->m_next;
680 
681 		n->m_len = m->m_len;
682 		if (m->m_flags & M_EXT) {
683 			n->m_data = m->m_data;
684 			if(!m->m_ext.ext_ref)
685 				mclrefcnt[mtocl(m->m_ext.ext_buf)]++;
686 			else
687 				(*(m->m_ext.ext_ref))(m->m_ext.ext_buf,
688 							m->m_ext.ext_size);
689 			n->m_ext = m->m_ext;
690 			n->m_flags |= M_EXT;
691 		} else {
692 			bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
693 		}
694 
695 		m = m->m_next;
696 	}
697 	return top;
698 nospace:
699 	m_freem(top);
700 	MCFail++;
701 	return 0;
702 }
703 
704 /*
705  * Copy data from an mbuf chain starting "off" bytes from the beginning,
706  * continuing for "len" bytes, into the indicated buffer.
707  */
708 void
709 m_copydata(m, off, len, cp)
710 	register struct mbuf *m;
711 	register int off;
712 	register int len;
713 	caddr_t cp;
714 {
715 	register unsigned count;
716 
717 	KASSERT(off >= 0, ("m_copydata, negative off %d", off));
718 	KASSERT(len >= 0, ("m_copydata, negative len %d", len));
719 	while (off > 0) {
720 		KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain"));
721 		if (off < m->m_len)
722 			break;
723 		off -= m->m_len;
724 		m = m->m_next;
725 	}
726 	while (len > 0) {
727 		KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain"));
728 		count = min(m->m_len - off, len);
729 		bcopy(mtod(m, caddr_t) + off, cp, count);
730 		len -= count;
731 		cp += count;
732 		off = 0;
733 		m = m->m_next;
734 	}
735 }
736 
737 /*
738  * Copy a packet header mbuf chain into a completely new chain, including
739  * copying any mbuf clusters.  Use this instead of m_copypacket() when
740  * you need a writable copy of an mbuf chain.
741  */
742 struct mbuf *
743 m_dup(m, how)
744 	struct mbuf *m;
745 	int how;
746 {
747 	struct mbuf **p, *top = NULL;
748 	int remain, moff, nsize;
749 
750 	/* Sanity check */
751 	if (m == NULL)
752 		return (0);
753 	KASSERT((m->m_flags & M_PKTHDR) != 0, ("%s: !PKTHDR", __FUNCTION__));
754 
755 	/* While there's more data, get a new mbuf, tack it on, and fill it */
756 	remain = m->m_pkthdr.len;
757 	moff = 0;
758 	p = &top;
759 	while (remain > 0 || top == NULL) {	/* allow m->m_pkthdr.len == 0 */
760 		struct mbuf *n;
761 
762 		/* Get the next new mbuf */
763 		MGET(n, how, m->m_type);
764 		if (n == NULL)
765 			goto nospace;
766 		if (top == NULL) {		/* first one, must be PKTHDR */
767 			M_COPY_PKTHDR(n, m);
768 			nsize = MHLEN;
769 		} else				/* not the first one */
770 			nsize = MLEN;
771 		if (remain >= MINCLSIZE) {
772 			MCLGET(n, how);
773 			if ((n->m_flags & M_EXT) == 0) {
774 				(void)m_free(n);
775 				goto nospace;
776 			}
777 			nsize = MCLBYTES;
778 		}
779 		n->m_len = 0;
780 
781 		/* Link it into the new chain */
782 		*p = n;
783 		p = &n->m_next;
784 
785 		/* Copy data from original mbuf(s) into new mbuf */
786 		while (n->m_len < nsize && m != NULL) {
787 			int chunk = min(nsize - n->m_len, m->m_len - moff);
788 
789 			bcopy(m->m_data + moff, n->m_data + n->m_len, chunk);
790 			moff += chunk;
791 			n->m_len += chunk;
792 			remain -= chunk;
793 			if (moff == m->m_len) {
794 				m = m->m_next;
795 				moff = 0;
796 			}
797 		}
798 
799 		/* Check correct total mbuf length */
800 		KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL),
801 		    	("%s: bogus m_pkthdr.len", __FUNCTION__));
802 	}
803 	return (top);
804 
805 nospace:
806 	m_freem(top);
807 	MCFail++;
808 	return (0);
809 }
810 
811 /*
812  * Concatenate mbuf chain n to m.
813  * Both chains must be of the same type (e.g. MT_DATA).
814  * Any m_pkthdr is not updated.
815  */
816 void
817 m_cat(m, n)
818 	register struct mbuf *m, *n;
819 {
820 	while (m->m_next)
821 		m = m->m_next;
822 	while (n) {
823 		if (m->m_flags & M_EXT ||
824 		    m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
825 			/* just join the two chains */
826 			m->m_next = n;
827 			return;
828 		}
829 		/* splat the data from one into the other */
830 		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
831 		    (u_int)n->m_len);
832 		m->m_len += n->m_len;
833 		n = m_free(n);
834 	}
835 }
836 
837 void
838 m_adj(mp, req_len)
839 	struct mbuf *mp;
840 	int req_len;
841 {
842 	register int len = req_len;
843 	register struct mbuf *m;
844 	register int count;
845 
846 	if ((m = mp) == NULL)
847 		return;
848 	if (len >= 0) {
849 		/*
850 		 * Trim from head.
851 		 */
852 		while (m != NULL && len > 0) {
853 			if (m->m_len <= len) {
854 				len -= m->m_len;
855 				m->m_len = 0;
856 				m = m->m_next;
857 			} else {
858 				m->m_len -= len;
859 				m->m_data += len;
860 				len = 0;
861 			}
862 		}
863 		m = mp;
864 		if (mp->m_flags & M_PKTHDR)
865 			m->m_pkthdr.len -= (req_len - len);
866 	} else {
867 		/*
868 		 * Trim from tail.  Scan the mbuf chain,
869 		 * calculating its length and finding the last mbuf.
870 		 * If the adjustment only affects this mbuf, then just
871 		 * adjust and return.  Otherwise, rescan and truncate
872 		 * after the remaining size.
873 		 */
874 		len = -len;
875 		count = 0;
876 		for (;;) {
877 			count += m->m_len;
878 			if (m->m_next == (struct mbuf *)0)
879 				break;
880 			m = m->m_next;
881 		}
882 		if (m->m_len >= len) {
883 			m->m_len -= len;
884 			if (mp->m_flags & M_PKTHDR)
885 				mp->m_pkthdr.len -= len;
886 			return;
887 		}
888 		count -= len;
889 		if (count < 0)
890 			count = 0;
891 		/*
892 		 * Correct length for chain is "count".
893 		 * Find the mbuf with last data, adjust its length,
894 		 * and toss data from remaining mbufs on chain.
895 		 */
896 		m = mp;
897 		if (m->m_flags & M_PKTHDR)
898 			m->m_pkthdr.len = count;
899 		for (; m; m = m->m_next) {
900 			if (m->m_len >= count) {
901 				m->m_len = count;
902 				break;
903 			}
904 			count -= m->m_len;
905 		}
906 		while (m->m_next)
907 			(m = m->m_next) ->m_len = 0;
908 	}
909 }
910 
911 /*
912  * Rearange an mbuf chain so that len bytes are contiguous
913  * and in the data area of an mbuf (so that mtod and dtom
914  * will work for a structure of size len).  Returns the resulting
915  * mbuf chain on success, frees it and returns null on failure.
916  * If there is room, it will add up to max_protohdr-len extra bytes to the
917  * contiguous region in an attempt to avoid being called next time.
918  */
919 #define MPFail (mbstat.m_mpfail)
920 
921 struct mbuf *
922 m_pullup(n, len)
923 	register struct mbuf *n;
924 	int len;
925 {
926 	register struct mbuf *m;
927 	register int count;
928 	int space;
929 
930 	/*
931 	 * If first mbuf has no cluster, and has room for len bytes
932 	 * without shifting current data, pullup into it,
933 	 * otherwise allocate a new mbuf to prepend to the chain.
934 	 */
935 	if ((n->m_flags & M_EXT) == 0 &&
936 	    n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
937 		if (n->m_len >= len)
938 			return (n);
939 		m = n;
940 		n = n->m_next;
941 		len -= m->m_len;
942 	} else {
943 		if (len > MHLEN)
944 			goto bad;
945 		MGET(m, M_DONTWAIT, n->m_type);
946 		if (m == 0)
947 			goto bad;
948 		m->m_len = 0;
949 		if (n->m_flags & M_PKTHDR) {
950 			M_COPY_PKTHDR(m, n);
951 			n->m_flags &= ~M_PKTHDR;
952 		}
953 	}
954 	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
955 	do {
956 		count = min(min(max(len, max_protohdr), space), n->m_len);
957 		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
958 		  (unsigned)count);
959 		len -= count;
960 		m->m_len += count;
961 		n->m_len -= count;
962 		space -= count;
963 		if (n->m_len)
964 			n->m_data += count;
965 		else
966 			n = m_free(n);
967 	} while (len > 0 && n);
968 	if (len > 0) {
969 		(void) m_free(m);
970 		goto bad;
971 	}
972 	m->m_next = n;
973 	return (m);
974 bad:
975 	m_freem(n);
976 	MPFail++;
977 	return (0);
978 }
979 
980 /*
981  * Partition an mbuf chain in two pieces, returning the tail --
982  * all but the first len0 bytes.  In case of failure, it returns NULL and
983  * attempts to restore the chain to its original state.
984  */
985 struct mbuf *
986 m_split(m0, len0, wait)
987 	register struct mbuf *m0;
988 	int len0, wait;
989 {
990 	register struct mbuf *m, *n;
991 	unsigned len = len0, remain;
992 
993 	for (m = m0; m && len > m->m_len; m = m->m_next)
994 		len -= m->m_len;
995 	if (m == 0)
996 		return (0);
997 	remain = m->m_len - len;
998 	if (m0->m_flags & M_PKTHDR) {
999 		MGETHDR(n, wait, m0->m_type);
1000 		if (n == 0)
1001 			return (0);
1002 		n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
1003 		n->m_pkthdr.len = m0->m_pkthdr.len - len0;
1004 		m0->m_pkthdr.len = len0;
1005 		if (m->m_flags & M_EXT)
1006 			goto extpacket;
1007 		if (remain > MHLEN) {
1008 			/* m can't be the lead packet */
1009 			MH_ALIGN(n, 0);
1010 			n->m_next = m_split(m, len, wait);
1011 			if (n->m_next == 0) {
1012 				(void) m_free(n);
1013 				return (0);
1014 			} else
1015 				return (n);
1016 		} else
1017 			MH_ALIGN(n, remain);
1018 	} else if (remain == 0) {
1019 		n = m->m_next;
1020 		m->m_next = 0;
1021 		return (n);
1022 	} else {
1023 		MGET(n, wait, m->m_type);
1024 		if (n == 0)
1025 			return (0);
1026 		M_ALIGN(n, remain);
1027 	}
1028 extpacket:
1029 	if (m->m_flags & M_EXT) {
1030 		n->m_flags |= M_EXT;
1031 		n->m_ext = m->m_ext;
1032 		if(!m->m_ext.ext_ref)
1033 			mclrefcnt[mtocl(m->m_ext.ext_buf)]++;
1034 		else
1035 			(*(m->m_ext.ext_ref))(m->m_ext.ext_buf,
1036 						m->m_ext.ext_size);
1037 		m->m_ext.ext_size = 0; /* For Accounting XXXXXX danger */
1038 		n->m_data = m->m_data + len;
1039 	} else {
1040 		bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
1041 	}
1042 	n->m_len = remain;
1043 	m->m_len = len;
1044 	n->m_next = m->m_next;
1045 	m->m_next = 0;
1046 	return (n);
1047 }
1048 /*
1049  * Routine to copy from device local memory into mbufs.
1050  */
1051 struct mbuf *
1052 m_devget(buf, totlen, off0, ifp, copy)
1053 	char *buf;
1054 	int totlen, off0;
1055 	struct ifnet *ifp;
1056 	void (*copy) __P((char *from, caddr_t to, u_int len));
1057 {
1058 	register struct mbuf *m;
1059 	struct mbuf *top = 0, **mp = &top;
1060 	register int off = off0, len;
1061 	register char *cp;
1062 	char *epkt;
1063 
1064 	cp = buf;
1065 	epkt = cp + totlen;
1066 	if (off) {
1067 		cp += off + 2 * sizeof(u_short);
1068 		totlen -= 2 * sizeof(u_short);
1069 	}
1070 	MGETHDR(m, M_DONTWAIT, MT_DATA);
1071 	if (m == 0)
1072 		return (0);
1073 	m->m_pkthdr.rcvif = ifp;
1074 	m->m_pkthdr.len = totlen;
1075 	m->m_len = MHLEN;
1076 
1077 	while (totlen > 0) {
1078 		if (top) {
1079 			MGET(m, M_DONTWAIT, MT_DATA);
1080 			if (m == 0) {
1081 				m_freem(top);
1082 				return (0);
1083 			}
1084 			m->m_len = MLEN;
1085 		}
1086 		len = min(totlen, epkt - cp);
1087 		if (len >= MINCLSIZE) {
1088 			MCLGET(m, M_DONTWAIT);
1089 			if (m->m_flags & M_EXT)
1090 				m->m_len = len = min(len, MCLBYTES);
1091 			else
1092 				len = m->m_len;
1093 		} else {
1094 			/*
1095 			 * Place initial small packet/header at end of mbuf.
1096 			 */
1097 			if (len < m->m_len) {
1098 				if (top == 0 && len + max_linkhdr <= m->m_len)
1099 					m->m_data += max_linkhdr;
1100 				m->m_len = len;
1101 			} else
1102 				len = m->m_len;
1103 		}
1104 		if (copy)
1105 			copy(cp, mtod(m, caddr_t), (unsigned)len);
1106 		else
1107 			bcopy(cp, mtod(m, caddr_t), (unsigned)len);
1108 		cp += len;
1109 		*mp = m;
1110 		mp = &m->m_next;
1111 		totlen -= len;
1112 		if (cp == epkt)
1113 			cp = buf;
1114 	}
1115 	return (top);
1116 }
1117 
1118 /*
1119  * Copy data from a buffer back into the indicated mbuf chain,
1120  * starting "off" bytes from the beginning, extending the mbuf
1121  * chain if necessary.
1122  */
1123 void
1124 m_copyback(m0, off, len, cp)
1125 	struct	mbuf *m0;
1126 	register int off;
1127 	register int len;
1128 	caddr_t cp;
1129 {
1130 	register int mlen;
1131 	register struct mbuf *m = m0, *n;
1132 	int totlen = 0;
1133 
1134 	if (m0 == 0)
1135 		return;
1136 	while (off > (mlen = m->m_len)) {
1137 		off -= mlen;
1138 		totlen += mlen;
1139 		if (m->m_next == 0) {
1140 			n = m_getclr(M_DONTWAIT, m->m_type);
1141 			if (n == 0)
1142 				goto out;
1143 			n->m_len = min(MLEN, len + off);
1144 			m->m_next = n;
1145 		}
1146 		m = m->m_next;
1147 	}
1148 	while (len > 0) {
1149 		mlen = min (m->m_len - off, len);
1150 		bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen);
1151 		cp += mlen;
1152 		len -= mlen;
1153 		mlen += off;
1154 		off = 0;
1155 		totlen += mlen;
1156 		if (len == 0)
1157 			break;
1158 		if (m->m_next == 0) {
1159 			n = m_get(M_DONTWAIT, m->m_type);
1160 			if (n == 0)
1161 				break;
1162 			n->m_len = min(MLEN, len);
1163 			m->m_next = n;
1164 		}
1165 		m = m->m_next;
1166 	}
1167 out:	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
1168 		m->m_pkthdr.len = totlen;
1169 }
1170 
1171 void
1172 m_print(const struct mbuf *m)
1173 {
1174 	int len;
1175 	const struct mbuf *m2;
1176 
1177 	len = m->m_pkthdr.len;
1178 	m2 = m;
1179 	while (len) {
1180 		printf("%p %*D\n", m2, m2->m_len, (u_char *)m2->m_data, "-");
1181 		len -= m2->m_len;
1182 		m2 = m2->m_next;
1183 	}
1184 	return;
1185 }
1186