xref: /freebsd/sys/kern/uipc_mbuf.c (revision 8e6b01171e30297084bb0b4457c4183c2746aacc)
1 /*
2  * Copyright (c) 1982, 1986, 1988, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
34  * $Id: uipc_mbuf.c,v 1.11 1995/08/28 09:18:52 julian Exp $
35  */
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/proc.h>
40 #include <sys/malloc.h>
41 #define MBTYPES
42 #include <sys/mbuf.h>
43 #include <sys/kernel.h>
44 #include <sys/syslog.h>
45 #include <sys/domain.h>
46 #include <sys/protosw.h>
47 
48 #include <vm/vm.h>
49 #include <vm/vm_kern.h>
50 
51 /*
52  * System initialization
53  */
54 
55 static void mbinit __P((void *));
56 SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbinit, NULL)
57 
58 
59 struct mbuf *mbutl;
60 char	*mclrefcnt;
61 struct mbstat mbstat;
62 union mcluster *mclfree;
63 int	max_linkhdr;
64 int	max_protohdr;
65 int	max_hdr;
66 int	max_datalen;
67 
68 /* ARGSUSED*/
69 static void
70 mbinit(udata)
71 	void *udata;		/* not used*/
72 {
73 	int s;
74 
75 #if CLBYTES < 4096
76 #define NCL_INIT	(4096/CLBYTES)
77 #else
78 #define NCL_INIT	1
79 #endif
80 	s = splimp();
81 	if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0)
82 		goto bad;
83 	splx(s);
84 	return;
85 bad:
86 	panic("mbinit");
87 }
88 
89 /*
90  * Allocate some number of mbuf clusters
91  * and place on cluster free list.
92  * Must be called at splimp.
93  */
94 /* ARGSUSED */
95 int
96 m_clalloc(ncl, nowait)
97 	register int ncl;
98 	int nowait;
99 {
100 	static int logged;
101 	register caddr_t p;
102 	register int i;
103 	int npg;
104 
105 	/*
106 	 * Once we run out of map space, it will be impossible
107 	 * to get any more (nothing is ever freed back to the
108 	 * map).
109 	 */
110 	if (mb_map_full)
111 		return (0);
112 
113 	npg = ncl * CLSIZE;
114 	p = (caddr_t)kmem_malloc(mb_map, ctob(npg),
115 				 nowait ? M_NOWAIT : M_WAITOK);
116 	/*
117 	 * Either the map is now full, or this is nowait and there
118 	 * are no pages left.
119 	 */
120 	if (p == NULL)
121 		return (0);
122 
123 	ncl = ncl * CLBYTES / MCLBYTES;
124 	for (i = 0; i < ncl; i++) {
125 		((union mcluster *)p)->mcl_next = mclfree;
126 		mclfree = (union mcluster *)p;
127 		p += MCLBYTES;
128 		mbstat.m_clfree++;
129 	}
130 	mbstat.m_clusters += ncl;
131 	return (1);
132 }
133 
134 /*
135  * When MGET failes, ask protocols to free space when short of memory,
136  * then re-attempt to allocate an mbuf.
137  */
138 struct mbuf *
139 m_retry(i, t)
140 	int i, t;
141 {
142 	register struct mbuf *m;
143 
144 	m_reclaim();
145 #define m_retry(i, t)	(struct mbuf *)0
146 	MGET(m, i, t);
147 #undef m_retry
148 	if (m != NULL)
149 		mbstat.m_wait++;
150 	else
151 		mbstat.m_drops++;
152 	return (m);
153 }
154 
155 /*
156  * As above; retry an MGETHDR.
157  */
158 struct mbuf *
159 m_retryhdr(i, t)
160 	int i, t;
161 {
162 	register struct mbuf *m;
163 
164 	m_reclaim();
165 #define m_retryhdr(i, t) (struct mbuf *)0
166 	MGETHDR(m, i, t);
167 #undef m_retryhdr
168 	if (m != NULL)
169 		mbstat.m_wait++;
170 	else
171 		mbstat.m_drops++;
172 	return (m);
173 }
174 
175 void
176 m_reclaim()
177 {
178 	register struct domain *dp;
179 	register struct protosw *pr;
180 	int s = splimp();
181 
182 	for (dp = domains; dp; dp = dp->dom_next)
183 		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
184 			if (pr->pr_drain)
185 				(*pr->pr_drain)();
186 	splx(s);
187 	mbstat.m_drain++;
188 }
189 
190 /*
191  * Space allocation routines.
192  * These are also available as macros
193  * for critical paths.
194  */
195 struct mbuf *
196 m_get(nowait, type)
197 	int nowait, type;
198 {
199 	register struct mbuf *m;
200 
201 	MGET(m, nowait, type);
202 	return (m);
203 }
204 
205 struct mbuf *
206 m_gethdr(nowait, type)
207 	int nowait, type;
208 {
209 	register struct mbuf *m;
210 
211 	MGETHDR(m, nowait, type);
212 	return (m);
213 }
214 
215 struct mbuf *
216 m_getclr(nowait, type)
217 	int nowait, type;
218 {
219 	register struct mbuf *m;
220 
221 	MGET(m, nowait, type);
222 	if (m == 0)
223 		return (0);
224 	bzero(mtod(m, caddr_t), MLEN);
225 	return (m);
226 }
227 
228 struct mbuf *
229 m_free(m)
230 	struct mbuf *m;
231 {
232 	register struct mbuf *n;
233 
234 	MFREE(m, n);
235 	return (n);
236 }
237 
238 void
239 m_freem(m)
240 	register struct mbuf *m;
241 {
242 	register struct mbuf *n;
243 
244 	if (m == NULL)
245 		return;
246 	do {
247 		MFREE(m, n);
248 		m = n;
249 	} while (m);
250 }
251 
252 /*
253  * Mbuffer utility routines.
254  */
255 
256 /*
257  * Lesser-used path for M_PREPEND:
258  * allocate new mbuf to prepend to chain,
259  * copy junk along.
260  */
261 struct mbuf *
262 m_prepend(m, len, how)
263 	register struct mbuf *m;
264 	int len, how;
265 {
266 	struct mbuf *mn;
267 
268 	MGET(mn, how, m->m_type);
269 	if (mn == (struct mbuf *)NULL) {
270 		m_freem(m);
271 		return ((struct mbuf *)NULL);
272 	}
273 	if (m->m_flags & M_PKTHDR) {
274 		M_COPY_PKTHDR(mn, m);
275 		m->m_flags &= ~M_PKTHDR;
276 	}
277 	mn->m_next = m;
278 	m = mn;
279 	if (len < MHLEN)
280 		MH_ALIGN(m, len);
281 	m->m_len = len;
282 	return (m);
283 }
284 
285 /*
286  * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
287  * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
288  * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
289  */
290 int MCFail;
291 
292 struct mbuf *
293 m_copym(m, off0, len, wait)
294 	register struct mbuf *m;
295 	int off0, wait;
296 	register int len;
297 {
298 	register struct mbuf *n, **np;
299 	register int off = off0;
300 	struct mbuf *top;
301 	int copyhdr = 0;
302 
303 	if (off < 0 || len < 0)
304 		panic("m_copym");
305 	if (off == 0 && m->m_flags & M_PKTHDR)
306 		copyhdr = 1;
307 	while (off > 0) {
308 		if (m == 0)
309 			panic("m_copym");
310 		if (off < m->m_len)
311 			break;
312 		off -= m->m_len;
313 		m = m->m_next;
314 	}
315 	np = &top;
316 	top = 0;
317 	while (len > 0) {
318 		if (m == 0) {
319 			if (len != M_COPYALL)
320 				panic("m_copym");
321 			break;
322 		}
323 		MGET(n, wait, m->m_type);
324 		*np = n;
325 		if (n == 0)
326 			goto nospace;
327 		if (copyhdr) {
328 			M_COPY_PKTHDR(n, m);
329 			if (len == M_COPYALL)
330 				n->m_pkthdr.len -= off0;
331 			else
332 				n->m_pkthdr.len = len;
333 			copyhdr = 0;
334 		}
335 		n->m_len = min(len, m->m_len - off);
336 		if (m->m_flags & M_EXT) {
337 			n->m_data = m->m_data + off;
338 			mclrefcnt[mtocl(m->m_ext.ext_buf)]++;
339 			n->m_ext = m->m_ext;
340 			n->m_flags |= M_EXT;
341 		} else
342 			bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
343 			    (unsigned)n->m_len);
344 		if (len != M_COPYALL)
345 			len -= n->m_len;
346 		off = 0;
347 		m = m->m_next;
348 		np = &n->m_next;
349 	}
350 	if (top == 0)
351 		MCFail++;
352 	return (top);
353 nospace:
354 	m_freem(top);
355 	MCFail++;
356 	return (0);
357 }
358 
359 /*
360  * Copy data from an mbuf chain starting "off" bytes from the beginning,
361  * continuing for "len" bytes, into the indicated buffer.
362  */
363 void
364 m_copydata(m, off, len, cp)
365 	register struct mbuf *m;
366 	register int off;
367 	register int len;
368 	caddr_t cp;
369 {
370 	register unsigned count;
371 
372 	if (off < 0 || len < 0)
373 		panic("m_copydata");
374 	while (off > 0) {
375 		if (m == 0)
376 			panic("m_copydata");
377 		if (off < m->m_len)
378 			break;
379 		off -= m->m_len;
380 		m = m->m_next;
381 	}
382 	while (len > 0) {
383 		if (m == 0)
384 			panic("m_copydata");
385 		count = min(m->m_len - off, len);
386 		bcopy(mtod(m, caddr_t) + off, cp, count);
387 		len -= count;
388 		cp += count;
389 		off = 0;
390 		m = m->m_next;
391 	}
392 }
393 
394 /*
395  * Concatenate mbuf chain n to m.
396  * Both chains must be of the same type (e.g. MT_DATA).
397  * Any m_pkthdr is not updated.
398  */
399 void
400 m_cat(m, n)
401 	register struct mbuf *m, *n;
402 {
403 	while (m->m_next)
404 		m = m->m_next;
405 	while (n) {
406 		if (m->m_flags & M_EXT ||
407 		    m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
408 			/* just join the two chains */
409 			m->m_next = n;
410 			return;
411 		}
412 		/* splat the data from one into the other */
413 		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
414 		    (u_int)n->m_len);
415 		m->m_len += n->m_len;
416 		n = m_free(n);
417 	}
418 }
419 
420 void
421 m_adj(mp, req_len)
422 	struct mbuf *mp;
423 	int req_len;
424 {
425 	register int len = req_len;
426 	register struct mbuf *m;
427 	register count;
428 
429 	if ((m = mp) == NULL)
430 		return;
431 	if (len >= 0) {
432 		/*
433 		 * Trim from head.
434 		 */
435 		while (m != NULL && len > 0) {
436 			if (m->m_len <= len) {
437 				len -= m->m_len;
438 				m->m_len = 0;
439 				m = m->m_next;
440 			} else {
441 				m->m_len -= len;
442 				m->m_data += len;
443 				len = 0;
444 			}
445 		}
446 		m = mp;
447 		if (mp->m_flags & M_PKTHDR)
448 			m->m_pkthdr.len -= (req_len - len);
449 	} else {
450 		/*
451 		 * Trim from tail.  Scan the mbuf chain,
452 		 * calculating its length and finding the last mbuf.
453 		 * If the adjustment only affects this mbuf, then just
454 		 * adjust and return.  Otherwise, rescan and truncate
455 		 * after the remaining size.
456 		 */
457 		len = -len;
458 		count = 0;
459 		for (;;) {
460 			count += m->m_len;
461 			if (m->m_next == (struct mbuf *)0)
462 				break;
463 			m = m->m_next;
464 		}
465 		if (m->m_len >= len) {
466 			m->m_len -= len;
467 			if (mp->m_flags & M_PKTHDR)
468 				mp->m_pkthdr.len -= len;
469 			return;
470 		}
471 		count -= len;
472 		if (count < 0)
473 			count = 0;
474 		/*
475 		 * Correct length for chain is "count".
476 		 * Find the mbuf with last data, adjust its length,
477 		 * and toss data from remaining mbufs on chain.
478 		 */
479 		m = mp;
480 		if (m->m_flags & M_PKTHDR)
481 			m->m_pkthdr.len = count;
482 		for (; m; m = m->m_next) {
483 			if (m->m_len >= count) {
484 				m->m_len = count;
485 				break;
486 			}
487 			count -= m->m_len;
488 		}
489 		while (m->m_next)
490 			(m = m->m_next) ->m_len = 0;
491 	}
492 }
493 
494 /*
495  * Rearange an mbuf chain so that len bytes are contiguous
496  * and in the data area of an mbuf (so that mtod and dtom
497  * will work for a structure of size len).  Returns the resulting
498  * mbuf chain on success, frees it and returns null on failure.
499  * If there is room, it will add up to max_protohdr-len extra bytes to the
500  * contiguous region in an attempt to avoid being called next time.
501  */
502 int MPFail;
503 
504 struct mbuf *
505 m_pullup(n, len)
506 	register struct mbuf *n;
507 	int len;
508 {
509 	register struct mbuf *m;
510 	register int count;
511 	int space;
512 
513 	/*
514 	 * If first mbuf has no cluster, and has room for len bytes
515 	 * without shifting current data, pullup into it,
516 	 * otherwise allocate a new mbuf to prepend to the chain.
517 	 */
518 	if ((n->m_flags & M_EXT) == 0 &&
519 	    n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
520 		if (n->m_len >= len)
521 			return (n);
522 		m = n;
523 		n = n->m_next;
524 		len -= m->m_len;
525 	} else {
526 		if (len > MHLEN)
527 			goto bad;
528 		MGET(m, M_DONTWAIT, n->m_type);
529 		if (m == 0)
530 			goto bad;
531 		m->m_len = 0;
532 		if (n->m_flags & M_PKTHDR) {
533 			M_COPY_PKTHDR(m, n);
534 			n->m_flags &= ~M_PKTHDR;
535 		}
536 	}
537 	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
538 	do {
539 		count = min(min(max(len, max_protohdr), space), n->m_len);
540 		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
541 		  (unsigned)count);
542 		len -= count;
543 		m->m_len += count;
544 		n->m_len -= count;
545 		space -= count;
546 		if (n->m_len)
547 			n->m_data += count;
548 		else
549 			n = m_free(n);
550 	} while (len > 0 && n);
551 	if (len > 0) {
552 		(void) m_free(m);
553 		goto bad;
554 	}
555 	m->m_next = n;
556 	return (m);
557 bad:
558 	m_freem(n);
559 	MPFail++;
560 	return (0);
561 }
562 
563 /*
564  * Partition an mbuf chain in two pieces, returning the tail --
565  * all but the first len0 bytes.  In case of failure, it returns NULL and
566  * attempts to restore the chain to its original state.
567  */
568 struct mbuf *
569 m_split(m0, len0, wait)
570 	register struct mbuf *m0;
571 	int len0, wait;
572 {
573 	register struct mbuf *m, *n;
574 	unsigned len = len0, remain;
575 
576 	for (m = m0; m && len > m->m_len; m = m->m_next)
577 		len -= m->m_len;
578 	if (m == 0)
579 		return (0);
580 	remain = m->m_len - len;
581 	if (m0->m_flags & M_PKTHDR) {
582 		MGETHDR(n, wait, m0->m_type);
583 		if (n == 0)
584 			return (0);
585 		n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
586 		n->m_pkthdr.len = m0->m_pkthdr.len - len0;
587 		m0->m_pkthdr.len = len0;
588 		if (m->m_flags & M_EXT)
589 			goto extpacket;
590 		if (remain > MHLEN) {
591 			/* m can't be the lead packet */
592 			MH_ALIGN(n, 0);
593 			n->m_next = m_split(m, len, wait);
594 			if (n->m_next == 0) {
595 				(void) m_free(n);
596 				return (0);
597 			} else
598 				return (n);
599 		} else
600 			MH_ALIGN(n, remain);
601 	} else if (remain == 0) {
602 		n = m->m_next;
603 		m->m_next = 0;
604 		return (n);
605 	} else {
606 		MGET(n, wait, m->m_type);
607 		if (n == 0)
608 			return (0);
609 		M_ALIGN(n, remain);
610 	}
611 extpacket:
612 	if (m->m_flags & M_EXT) {
613 		n->m_flags |= M_EXT;
614 		n->m_ext = m->m_ext;
615 		mclrefcnt[mtocl(m->m_ext.ext_buf)]++;
616 		m->m_ext.ext_size = 0; /* For Accounting XXXXXX danger */
617 		n->m_data = m->m_data + len;
618 	} else {
619 		bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
620 	}
621 	n->m_len = remain;
622 	m->m_len = len;
623 	n->m_next = m->m_next;
624 	m->m_next = 0;
625 	return (n);
626 }
627 /*
628  * Routine to copy from device local memory into mbufs.
629  */
630 struct mbuf *
631 m_devget(buf, totlen, off0, ifp, copy)
632 	char *buf;
633 	int totlen, off0;
634 	struct ifnet *ifp;
635 	void (*copy)();
636 {
637 	register struct mbuf *m;
638 	struct mbuf *top = 0, **mp = &top;
639 	register int off = off0, len;
640 	register char *cp;
641 	char *epkt;
642 
643 	cp = buf;
644 	epkt = cp + totlen;
645 	if (off) {
646 		cp += off + 2 * sizeof(u_short);
647 		totlen -= 2 * sizeof(u_short);
648 	}
649 	MGETHDR(m, M_DONTWAIT, MT_DATA);
650 	if (m == 0)
651 		return (0);
652 	m->m_pkthdr.rcvif = ifp;
653 	m->m_pkthdr.len = totlen;
654 	m->m_len = MHLEN;
655 
656 	while (totlen > 0) {
657 		if (top) {
658 			MGET(m, M_DONTWAIT, MT_DATA);
659 			if (m == 0) {
660 				m_freem(top);
661 				return (0);
662 			}
663 			m->m_len = MLEN;
664 		}
665 		len = min(totlen, epkt - cp);
666 		if (len >= MINCLSIZE) {
667 			MCLGET(m, M_DONTWAIT);
668 			if (m->m_flags & M_EXT)
669 				m->m_len = len = min(len, MCLBYTES);
670 			else
671 				len = m->m_len;
672 		} else {
673 			/*
674 			 * Place initial small packet/header at end of mbuf.
675 			 */
676 			if (len < m->m_len) {
677 				if (top == 0 && len + max_linkhdr <= m->m_len)
678 					m->m_data += max_linkhdr;
679 				m->m_len = len;
680 			} else
681 				len = m->m_len;
682 		}
683 		if (copy)
684 			copy(cp, mtod(m, caddr_t), (unsigned)len);
685 		else
686 			bcopy(cp, mtod(m, caddr_t), (unsigned)len);
687 		cp += len;
688 		*mp = m;
689 		mp = &m->m_next;
690 		totlen -= len;
691 		if (cp == epkt)
692 			cp = buf;
693 	}
694 	return (top);
695 }
696 
697 /*
698  * Copy data from a buffer back into the indicated mbuf chain,
699  * starting "off" bytes from the beginning, extending the mbuf
700  * chain if necessary.
701  */
702 void
703 m_copyback(m0, off, len, cp)
704 	struct	mbuf *m0;
705 	register int off;
706 	register int len;
707 	caddr_t cp;
708 {
709 	register int mlen;
710 	register struct mbuf *m = m0, *n;
711 	int totlen = 0;
712 
713 	if (m0 == 0)
714 		return;
715 	while (off > (mlen = m->m_len)) {
716 		off -= mlen;
717 		totlen += mlen;
718 		if (m->m_next == 0) {
719 			n = m_getclr(M_DONTWAIT, m->m_type);
720 			if (n == 0)
721 				goto out;
722 			n->m_len = min(MLEN, len + off);
723 			m->m_next = n;
724 		}
725 		m = m->m_next;
726 	}
727 	while (len > 0) {
728 		mlen = min (m->m_len - off, len);
729 		bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen);
730 		cp += mlen;
731 		len -= mlen;
732 		mlen += off;
733 		off = 0;
734 		totlen += mlen;
735 		if (len == 0)
736 			break;
737 		if (m->m_next == 0) {
738 			n = m_get(M_DONTWAIT, m->m_type);
739 			if (n == 0)
740 				break;
741 			n->m_len = min(MLEN, len);
742 			m->m_next = n;
743 		}
744 		m = m->m_next;
745 	}
746 out:	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
747 		m->m_pkthdr.len = totlen;
748 }
749