xref: /freebsd/sys/kern/uipc_mbuf.c (revision d82e286489da73321a47e329d98a98817b0438b6)
1 /*
2  * Copyright (c) 1982, 1986, 1988, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
34  * $Id: uipc_mbuf.c,v 1.12 1995/09/09 18:10:12 davidg Exp $
35  */
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/proc.h>
40 #include <sys/malloc.h>
41 #define MBTYPES
42 #include <sys/mbuf.h>
43 #include <sys/kernel.h>
44 #include <sys/syslog.h>
45 #include <sys/domain.h>
46 #include <sys/protosw.h>
47 
48 #include <vm/vm.h>
49 #include <vm/vm_kern.h>
50 
51 /*
52  * System initialization
53  */
54 
55 static void mbinit __P((void *));
56 SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbinit, NULL)
57 
58 
59 struct mbuf *mbutl;
60 char	*mclrefcnt;
61 struct mbstat mbstat;
62 union mcluster *mclfree;
63 int	max_linkhdr;
64 int	max_protohdr;
65 int	max_hdr;
66 int	max_datalen;
67 
68 /* ARGSUSED*/
69 static void
70 mbinit(udata)
71 	void *udata;		/* not used*/
72 {
73 	int s;
74 
75 #if CLBYTES < 4096
76 #define NCL_INIT	(4096/CLBYTES)
77 #else
78 #define NCL_INIT	1
79 #endif
80 	s = splimp();
81 	if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0)
82 		goto bad;
83 	splx(s);
84 	return;
85 bad:
86 	panic("mbinit");
87 }
88 
89 /*
90  * Allocate some number of mbuf clusters
91  * and place on cluster free list.
92  * Must be called at splimp.
93  */
94 /* ARGSUSED */
95 int
96 m_clalloc(ncl, nowait)
97 	register int ncl;
98 	int nowait;
99 {
100 	register caddr_t p;
101 	register int i;
102 	int npg;
103 
104 	/*
105 	 * Once we run out of map space, it will be impossible
106 	 * to get any more (nothing is ever freed back to the
107 	 * map).
108 	 */
109 	if (mb_map_full)
110 		return (0);
111 
112 	npg = ncl * CLSIZE;
113 	p = (caddr_t)kmem_malloc(mb_map, ctob(npg),
114 				 nowait ? M_NOWAIT : M_WAITOK);
115 	/*
116 	 * Either the map is now full, or this is nowait and there
117 	 * are no pages left.
118 	 */
119 	if (p == NULL)
120 		return (0);
121 
122 	ncl = ncl * CLBYTES / MCLBYTES;
123 	for (i = 0; i < ncl; i++) {
124 		((union mcluster *)p)->mcl_next = mclfree;
125 		mclfree = (union mcluster *)p;
126 		p += MCLBYTES;
127 		mbstat.m_clfree++;
128 	}
129 	mbstat.m_clusters += ncl;
130 	return (1);
131 }
132 
133 /*
134  * When MGET failes, ask protocols to free space when short of memory,
135  * then re-attempt to allocate an mbuf.
136  */
137 struct mbuf *
138 m_retry(i, t)
139 	int i, t;
140 {
141 	register struct mbuf *m;
142 
143 	m_reclaim();
144 #define m_retry(i, t)	(struct mbuf *)0
145 	MGET(m, i, t);
146 #undef m_retry
147 	if (m != NULL)
148 		mbstat.m_wait++;
149 	else
150 		mbstat.m_drops++;
151 	return (m);
152 }
153 
154 /*
155  * As above; retry an MGETHDR.
156  */
157 struct mbuf *
158 m_retryhdr(i, t)
159 	int i, t;
160 {
161 	register struct mbuf *m;
162 
163 	m_reclaim();
164 #define m_retryhdr(i, t) (struct mbuf *)0
165 	MGETHDR(m, i, t);
166 #undef m_retryhdr
167 	if (m != NULL)
168 		mbstat.m_wait++;
169 	else
170 		mbstat.m_drops++;
171 	return (m);
172 }
173 
174 void
175 m_reclaim()
176 {
177 	register struct domain *dp;
178 	register struct protosw *pr;
179 	int s = splimp();
180 
181 	for (dp = domains; dp; dp = dp->dom_next)
182 		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
183 			if (pr->pr_drain)
184 				(*pr->pr_drain)();
185 	splx(s);
186 	mbstat.m_drain++;
187 }
188 
189 /*
190  * Space allocation routines.
191  * These are also available as macros
192  * for critical paths.
193  */
194 struct mbuf *
195 m_get(nowait, type)
196 	int nowait, type;
197 {
198 	register struct mbuf *m;
199 
200 	MGET(m, nowait, type);
201 	return (m);
202 }
203 
204 struct mbuf *
205 m_gethdr(nowait, type)
206 	int nowait, type;
207 {
208 	register struct mbuf *m;
209 
210 	MGETHDR(m, nowait, type);
211 	return (m);
212 }
213 
214 struct mbuf *
215 m_getclr(nowait, type)
216 	int nowait, type;
217 {
218 	register struct mbuf *m;
219 
220 	MGET(m, nowait, type);
221 	if (m == 0)
222 		return (0);
223 	bzero(mtod(m, caddr_t), MLEN);
224 	return (m);
225 }
226 
227 struct mbuf *
228 m_free(m)
229 	struct mbuf *m;
230 {
231 	register struct mbuf *n;
232 
233 	MFREE(m, n);
234 	return (n);
235 }
236 
237 void
238 m_freem(m)
239 	register struct mbuf *m;
240 {
241 	register struct mbuf *n;
242 
243 	if (m == NULL)
244 		return;
245 	do {
246 		MFREE(m, n);
247 		m = n;
248 	} while (m);
249 }
250 
251 /*
252  * Mbuffer utility routines.
253  */
254 
255 /*
256  * Lesser-used path for M_PREPEND:
257  * allocate new mbuf to prepend to chain,
258  * copy junk along.
259  */
260 struct mbuf *
261 m_prepend(m, len, how)
262 	register struct mbuf *m;
263 	int len, how;
264 {
265 	struct mbuf *mn;
266 
267 	MGET(mn, how, m->m_type);
268 	if (mn == (struct mbuf *)NULL) {
269 		m_freem(m);
270 		return ((struct mbuf *)NULL);
271 	}
272 	if (m->m_flags & M_PKTHDR) {
273 		M_COPY_PKTHDR(mn, m);
274 		m->m_flags &= ~M_PKTHDR;
275 	}
276 	mn->m_next = m;
277 	m = mn;
278 	if (len < MHLEN)
279 		MH_ALIGN(m, len);
280 	m->m_len = len;
281 	return (m);
282 }
283 
284 /*
285  * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
286  * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
287  * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
288  */
289 int MCFail;
290 
291 struct mbuf *
292 m_copym(m, off0, len, wait)
293 	register struct mbuf *m;
294 	int off0, wait;
295 	register int len;
296 {
297 	register struct mbuf *n, **np;
298 	register int off = off0;
299 	struct mbuf *top;
300 	int copyhdr = 0;
301 
302 	if (off < 0 || len < 0)
303 		panic("m_copym");
304 	if (off == 0 && m->m_flags & M_PKTHDR)
305 		copyhdr = 1;
306 	while (off > 0) {
307 		if (m == 0)
308 			panic("m_copym");
309 		if (off < m->m_len)
310 			break;
311 		off -= m->m_len;
312 		m = m->m_next;
313 	}
314 	np = &top;
315 	top = 0;
316 	while (len > 0) {
317 		if (m == 0) {
318 			if (len != M_COPYALL)
319 				panic("m_copym");
320 			break;
321 		}
322 		MGET(n, wait, m->m_type);
323 		*np = n;
324 		if (n == 0)
325 			goto nospace;
326 		if (copyhdr) {
327 			M_COPY_PKTHDR(n, m);
328 			if (len == M_COPYALL)
329 				n->m_pkthdr.len -= off0;
330 			else
331 				n->m_pkthdr.len = len;
332 			copyhdr = 0;
333 		}
334 		n->m_len = min(len, m->m_len - off);
335 		if (m->m_flags & M_EXT) {
336 			n->m_data = m->m_data + off;
337 			mclrefcnt[mtocl(m->m_ext.ext_buf)]++;
338 			n->m_ext = m->m_ext;
339 			n->m_flags |= M_EXT;
340 		} else
341 			bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
342 			    (unsigned)n->m_len);
343 		if (len != M_COPYALL)
344 			len -= n->m_len;
345 		off = 0;
346 		m = m->m_next;
347 		np = &n->m_next;
348 	}
349 	if (top == 0)
350 		MCFail++;
351 	return (top);
352 nospace:
353 	m_freem(top);
354 	MCFail++;
355 	return (0);
356 }
357 
358 /*
359  * Copy data from an mbuf chain starting "off" bytes from the beginning,
360  * continuing for "len" bytes, into the indicated buffer.
361  */
362 void
363 m_copydata(m, off, len, cp)
364 	register struct mbuf *m;
365 	register int off;
366 	register int len;
367 	caddr_t cp;
368 {
369 	register unsigned count;
370 
371 	if (off < 0 || len < 0)
372 		panic("m_copydata");
373 	while (off > 0) {
374 		if (m == 0)
375 			panic("m_copydata");
376 		if (off < m->m_len)
377 			break;
378 		off -= m->m_len;
379 		m = m->m_next;
380 	}
381 	while (len > 0) {
382 		if (m == 0)
383 			panic("m_copydata");
384 		count = min(m->m_len - off, len);
385 		bcopy(mtod(m, caddr_t) + off, cp, count);
386 		len -= count;
387 		cp += count;
388 		off = 0;
389 		m = m->m_next;
390 	}
391 }
392 
393 /*
394  * Concatenate mbuf chain n to m.
395  * Both chains must be of the same type (e.g. MT_DATA).
396  * Any m_pkthdr is not updated.
397  */
398 void
399 m_cat(m, n)
400 	register struct mbuf *m, *n;
401 {
402 	while (m->m_next)
403 		m = m->m_next;
404 	while (n) {
405 		if (m->m_flags & M_EXT ||
406 		    m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
407 			/* just join the two chains */
408 			m->m_next = n;
409 			return;
410 		}
411 		/* splat the data from one into the other */
412 		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
413 		    (u_int)n->m_len);
414 		m->m_len += n->m_len;
415 		n = m_free(n);
416 	}
417 }
418 
419 void
420 m_adj(mp, req_len)
421 	struct mbuf *mp;
422 	int req_len;
423 {
424 	register int len = req_len;
425 	register struct mbuf *m;
426 	register count;
427 
428 	if ((m = mp) == NULL)
429 		return;
430 	if (len >= 0) {
431 		/*
432 		 * Trim from head.
433 		 */
434 		while (m != NULL && len > 0) {
435 			if (m->m_len <= len) {
436 				len -= m->m_len;
437 				m->m_len = 0;
438 				m = m->m_next;
439 			} else {
440 				m->m_len -= len;
441 				m->m_data += len;
442 				len = 0;
443 			}
444 		}
445 		m = mp;
446 		if (mp->m_flags & M_PKTHDR)
447 			m->m_pkthdr.len -= (req_len - len);
448 	} else {
449 		/*
450 		 * Trim from tail.  Scan the mbuf chain,
451 		 * calculating its length and finding the last mbuf.
452 		 * If the adjustment only affects this mbuf, then just
453 		 * adjust and return.  Otherwise, rescan and truncate
454 		 * after the remaining size.
455 		 */
456 		len = -len;
457 		count = 0;
458 		for (;;) {
459 			count += m->m_len;
460 			if (m->m_next == (struct mbuf *)0)
461 				break;
462 			m = m->m_next;
463 		}
464 		if (m->m_len >= len) {
465 			m->m_len -= len;
466 			if (mp->m_flags & M_PKTHDR)
467 				mp->m_pkthdr.len -= len;
468 			return;
469 		}
470 		count -= len;
471 		if (count < 0)
472 			count = 0;
473 		/*
474 		 * Correct length for chain is "count".
475 		 * Find the mbuf with last data, adjust its length,
476 		 * and toss data from remaining mbufs on chain.
477 		 */
478 		m = mp;
479 		if (m->m_flags & M_PKTHDR)
480 			m->m_pkthdr.len = count;
481 		for (; m; m = m->m_next) {
482 			if (m->m_len >= count) {
483 				m->m_len = count;
484 				break;
485 			}
486 			count -= m->m_len;
487 		}
488 		while (m->m_next)
489 			(m = m->m_next) ->m_len = 0;
490 	}
491 }
492 
493 /*
494  * Rearange an mbuf chain so that len bytes are contiguous
495  * and in the data area of an mbuf (so that mtod and dtom
496  * will work for a structure of size len).  Returns the resulting
497  * mbuf chain on success, frees it and returns null on failure.
498  * If there is room, it will add up to max_protohdr-len extra bytes to the
499  * contiguous region in an attempt to avoid being called next time.
500  */
501 int MPFail;
502 
503 struct mbuf *
504 m_pullup(n, len)
505 	register struct mbuf *n;
506 	int len;
507 {
508 	register struct mbuf *m;
509 	register int count;
510 	int space;
511 
512 	/*
513 	 * If first mbuf has no cluster, and has room for len bytes
514 	 * without shifting current data, pullup into it,
515 	 * otherwise allocate a new mbuf to prepend to the chain.
516 	 */
517 	if ((n->m_flags & M_EXT) == 0 &&
518 	    n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
519 		if (n->m_len >= len)
520 			return (n);
521 		m = n;
522 		n = n->m_next;
523 		len -= m->m_len;
524 	} else {
525 		if (len > MHLEN)
526 			goto bad;
527 		MGET(m, M_DONTWAIT, n->m_type);
528 		if (m == 0)
529 			goto bad;
530 		m->m_len = 0;
531 		if (n->m_flags & M_PKTHDR) {
532 			M_COPY_PKTHDR(m, n);
533 			n->m_flags &= ~M_PKTHDR;
534 		}
535 	}
536 	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
537 	do {
538 		count = min(min(max(len, max_protohdr), space), n->m_len);
539 		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
540 		  (unsigned)count);
541 		len -= count;
542 		m->m_len += count;
543 		n->m_len -= count;
544 		space -= count;
545 		if (n->m_len)
546 			n->m_data += count;
547 		else
548 			n = m_free(n);
549 	} while (len > 0 && n);
550 	if (len > 0) {
551 		(void) m_free(m);
552 		goto bad;
553 	}
554 	m->m_next = n;
555 	return (m);
556 bad:
557 	m_freem(n);
558 	MPFail++;
559 	return (0);
560 }
561 
562 /*
563  * Partition an mbuf chain in two pieces, returning the tail --
564  * all but the first len0 bytes.  In case of failure, it returns NULL and
565  * attempts to restore the chain to its original state.
566  */
567 struct mbuf *
568 m_split(m0, len0, wait)
569 	register struct mbuf *m0;
570 	int len0, wait;
571 {
572 	register struct mbuf *m, *n;
573 	unsigned len = len0, remain;
574 
575 	for (m = m0; m && len > m->m_len; m = m->m_next)
576 		len -= m->m_len;
577 	if (m == 0)
578 		return (0);
579 	remain = m->m_len - len;
580 	if (m0->m_flags & M_PKTHDR) {
581 		MGETHDR(n, wait, m0->m_type);
582 		if (n == 0)
583 			return (0);
584 		n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
585 		n->m_pkthdr.len = m0->m_pkthdr.len - len0;
586 		m0->m_pkthdr.len = len0;
587 		if (m->m_flags & M_EXT)
588 			goto extpacket;
589 		if (remain > MHLEN) {
590 			/* m can't be the lead packet */
591 			MH_ALIGN(n, 0);
592 			n->m_next = m_split(m, len, wait);
593 			if (n->m_next == 0) {
594 				(void) m_free(n);
595 				return (0);
596 			} else
597 				return (n);
598 		} else
599 			MH_ALIGN(n, remain);
600 	} else if (remain == 0) {
601 		n = m->m_next;
602 		m->m_next = 0;
603 		return (n);
604 	} else {
605 		MGET(n, wait, m->m_type);
606 		if (n == 0)
607 			return (0);
608 		M_ALIGN(n, remain);
609 	}
610 extpacket:
611 	if (m->m_flags & M_EXT) {
612 		n->m_flags |= M_EXT;
613 		n->m_ext = m->m_ext;
614 		mclrefcnt[mtocl(m->m_ext.ext_buf)]++;
615 		m->m_ext.ext_size = 0; /* For Accounting XXXXXX danger */
616 		n->m_data = m->m_data + len;
617 	} else {
618 		bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
619 	}
620 	n->m_len = remain;
621 	m->m_len = len;
622 	n->m_next = m->m_next;
623 	m->m_next = 0;
624 	return (n);
625 }
626 /*
627  * Routine to copy from device local memory into mbufs.
628  */
629 struct mbuf *
630 m_devget(buf, totlen, off0, ifp, copy)
631 	char *buf;
632 	int totlen, off0;
633 	struct ifnet *ifp;
634 	void (*copy)();
635 {
636 	register struct mbuf *m;
637 	struct mbuf *top = 0, **mp = &top;
638 	register int off = off0, len;
639 	register char *cp;
640 	char *epkt;
641 
642 	cp = buf;
643 	epkt = cp + totlen;
644 	if (off) {
645 		cp += off + 2 * sizeof(u_short);
646 		totlen -= 2 * sizeof(u_short);
647 	}
648 	MGETHDR(m, M_DONTWAIT, MT_DATA);
649 	if (m == 0)
650 		return (0);
651 	m->m_pkthdr.rcvif = ifp;
652 	m->m_pkthdr.len = totlen;
653 	m->m_len = MHLEN;
654 
655 	while (totlen > 0) {
656 		if (top) {
657 			MGET(m, M_DONTWAIT, MT_DATA);
658 			if (m == 0) {
659 				m_freem(top);
660 				return (0);
661 			}
662 			m->m_len = MLEN;
663 		}
664 		len = min(totlen, epkt - cp);
665 		if (len >= MINCLSIZE) {
666 			MCLGET(m, M_DONTWAIT);
667 			if (m->m_flags & M_EXT)
668 				m->m_len = len = min(len, MCLBYTES);
669 			else
670 				len = m->m_len;
671 		} else {
672 			/*
673 			 * Place initial small packet/header at end of mbuf.
674 			 */
675 			if (len < m->m_len) {
676 				if (top == 0 && len + max_linkhdr <= m->m_len)
677 					m->m_data += max_linkhdr;
678 				m->m_len = len;
679 			} else
680 				len = m->m_len;
681 		}
682 		if (copy)
683 			copy(cp, mtod(m, caddr_t), (unsigned)len);
684 		else
685 			bcopy(cp, mtod(m, caddr_t), (unsigned)len);
686 		cp += len;
687 		*mp = m;
688 		mp = &m->m_next;
689 		totlen -= len;
690 		if (cp == epkt)
691 			cp = buf;
692 	}
693 	return (top);
694 }
695 
696 /*
697  * Copy data from a buffer back into the indicated mbuf chain,
698  * starting "off" bytes from the beginning, extending the mbuf
699  * chain if necessary.
700  */
701 void
702 m_copyback(m0, off, len, cp)
703 	struct	mbuf *m0;
704 	register int off;
705 	register int len;
706 	caddr_t cp;
707 {
708 	register int mlen;
709 	register struct mbuf *m = m0, *n;
710 	int totlen = 0;
711 
712 	if (m0 == 0)
713 		return;
714 	while (off > (mlen = m->m_len)) {
715 		off -= mlen;
716 		totlen += mlen;
717 		if (m->m_next == 0) {
718 			n = m_getclr(M_DONTWAIT, m->m_type);
719 			if (n == 0)
720 				goto out;
721 			n->m_len = min(MLEN, len + off);
722 			m->m_next = n;
723 		}
724 		m = m->m_next;
725 	}
726 	while (len > 0) {
727 		mlen = min (m->m_len - off, len);
728 		bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen);
729 		cp += mlen;
730 		len -= mlen;
731 		mlen += off;
732 		off = 0;
733 		totlen += mlen;
734 		if (len == 0)
735 			break;
736 		if (m->m_next == 0) {
737 			n = m_get(M_DONTWAIT, m->m_type);
738 			if (n == 0)
739 				break;
740 			n->m_len = min(MLEN, len);
741 			m->m_next = n;
742 		}
743 		m = m->m_next;
744 	}
745 out:	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
746 		m->m_pkthdr.len = totlen;
747 }
748