xref: /freebsd/sys/kern/uipc_mbuf.c (revision ef5d438ed4bc17ad7ece3e40fe4d1f9baf3aadf7)
1 /*
2  * Copyright (c) 1982, 1986, 1988, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
34  * $Id: uipc_mbuf.c,v 1.16 1995/12/07 12:46:59 davidg Exp $
35  */
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/proc.h>
40 #include <sys/malloc.h>
41 #define MBTYPES
42 #include <sys/mbuf.h>
43 #include <sys/kernel.h>
44 #include <sys/syslog.h>
45 #include <sys/domain.h>
46 #include <sys/protosw.h>
47 
48 #include <vm/vm.h>
49 #include <vm/vm_param.h>
50 #include <vm/vm_kern.h>
51 #include <vm/vm_extern.h>
52 
53 static void mbinit __P((void *));
54 SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbinit, NULL)
55 
56 struct mbuf *mbutl;
57 char	*mclrefcnt;
58 struct mbstat mbstat;
59 union mcluster *mclfree;
60 int	max_linkhdr;
61 int	max_protohdr;
62 int	max_hdr;
63 int	max_datalen;
64 
65 static void	m_reclaim __P((void));
66 
67 /* ARGSUSED*/
68 static void
69 mbinit(dummy)
70 	void *dummy;
71 {
72 	int s;
73 
74 #if CLBYTES < 4096
75 #define NCL_INIT	(4096/CLBYTES)
76 #else
77 #define NCL_INIT	1
78 #endif
79 	s = splimp();
80 	if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0)
81 		goto bad;
82 	splx(s);
83 	return;
84 bad:
85 	panic("mbinit");
86 }
87 
88 /*
89  * Allocate some number of mbuf clusters
90  * and place on cluster free list.
91  * Must be called at splimp.
92  */
93 /* ARGSUSED */
94 int
95 m_clalloc(ncl, nowait)
96 	register int ncl;
97 	int nowait;
98 {
99 	register caddr_t p;
100 	register int i;
101 	int npg;
102 
103 	/*
104 	 * Once we run out of map space, it will be impossible
105 	 * to get any more (nothing is ever freed back to the
106 	 * map).
107 	 */
108 	if (mb_map_full)
109 		return (0);
110 
111 	npg = ncl * CLSIZE;
112 	p = (caddr_t)kmem_malloc(mb_map, ctob(npg),
113 				 nowait ? M_NOWAIT : M_WAITOK);
114 	/*
115 	 * Either the map is now full, or this is nowait and there
116 	 * are no pages left.
117 	 */
118 	if (p == NULL)
119 		return (0);
120 
121 	ncl = ncl * CLBYTES / MCLBYTES;
122 	for (i = 0; i < ncl; i++) {
123 		((union mcluster *)p)->mcl_next = mclfree;
124 		mclfree = (union mcluster *)p;
125 		p += MCLBYTES;
126 		mbstat.m_clfree++;
127 	}
128 	mbstat.m_clusters += ncl;
129 	return (1);
130 }
131 
132 /*
133  * When MGET failes, ask protocols to free space when short of memory,
134  * then re-attempt to allocate an mbuf.
135  */
136 struct mbuf *
137 m_retry(i, t)
138 	int i, t;
139 {
140 	register struct mbuf *m;
141 
142 	m_reclaim();
143 #define m_retry(i, t)	(struct mbuf *)0
144 	MGET(m, i, t);
145 #undef m_retry
146 	if (m != NULL)
147 		mbstat.m_wait++;
148 	else
149 		mbstat.m_drops++;
150 	return (m);
151 }
152 
153 /*
154  * As above; retry an MGETHDR.
155  */
156 struct mbuf *
157 m_retryhdr(i, t)
158 	int i, t;
159 {
160 	register struct mbuf *m;
161 
162 	m_reclaim();
163 #define m_retryhdr(i, t) (struct mbuf *)0
164 	MGETHDR(m, i, t);
165 #undef m_retryhdr
166 	if (m != NULL)
167 		mbstat.m_wait++;
168 	else
169 		mbstat.m_drops++;
170 	return (m);
171 }
172 
173 static void
174 m_reclaim()
175 {
176 	register struct domain *dp;
177 	register struct protosw *pr;
178 	int s = splimp();
179 
180 	for (dp = domains; dp; dp = dp->dom_next)
181 		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
182 			if (pr->pr_drain)
183 				(*pr->pr_drain)();
184 	splx(s);
185 	mbstat.m_drain++;
186 }
187 
188 /*
189  * Space allocation routines.
190  * These are also available as macros
191  * for critical paths.
192  */
193 struct mbuf *
194 m_get(nowait, type)
195 	int nowait, type;
196 {
197 	register struct mbuf *m;
198 
199 	MGET(m, nowait, type);
200 	return (m);
201 }
202 
203 struct mbuf *
204 m_gethdr(nowait, type)
205 	int nowait, type;
206 {
207 	register struct mbuf *m;
208 
209 	MGETHDR(m, nowait, type);
210 	return (m);
211 }
212 
213 struct mbuf *
214 m_getclr(nowait, type)
215 	int nowait, type;
216 {
217 	register struct mbuf *m;
218 
219 	MGET(m, nowait, type);
220 	if (m == 0)
221 		return (0);
222 	bzero(mtod(m, caddr_t), MLEN);
223 	return (m);
224 }
225 
226 struct mbuf *
227 m_free(m)
228 	struct mbuf *m;
229 {
230 	register struct mbuf *n;
231 
232 	MFREE(m, n);
233 	return (n);
234 }
235 
236 void
237 m_freem(m)
238 	register struct mbuf *m;
239 {
240 	register struct mbuf *n;
241 
242 	if (m == NULL)
243 		return;
244 	do {
245 		MFREE(m, n);
246 		m = n;
247 	} while (m);
248 }
249 
250 /*
251  * Mbuffer utility routines.
252  */
253 
254 /*
255  * Lesser-used path for M_PREPEND:
256  * allocate new mbuf to prepend to chain,
257  * copy junk along.
258  */
259 struct mbuf *
260 m_prepend(m, len, how)
261 	register struct mbuf *m;
262 	int len, how;
263 {
264 	struct mbuf *mn;
265 
266 	MGET(mn, how, m->m_type);
267 	if (mn == (struct mbuf *)NULL) {
268 		m_freem(m);
269 		return ((struct mbuf *)NULL);
270 	}
271 	if (m->m_flags & M_PKTHDR) {
272 		M_COPY_PKTHDR(mn, m);
273 		m->m_flags &= ~M_PKTHDR;
274 	}
275 	mn->m_next = m;
276 	m = mn;
277 	if (len < MHLEN)
278 		MH_ALIGN(m, len);
279 	m->m_len = len;
280 	return (m);
281 }
282 
283 /*
284  * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
285  * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
286  * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
287  */
288 static int MCFail;
289 
290 struct mbuf *
291 m_copym(m, off0, len, wait)
292 	register struct mbuf *m;
293 	int off0, wait;
294 	register int len;
295 {
296 	register struct mbuf *n, **np;
297 	register int off = off0;
298 	struct mbuf *top;
299 	int copyhdr = 0;
300 
301 	if (off < 0 || len < 0)
302 		panic("m_copym");
303 	if (off == 0 && m->m_flags & M_PKTHDR)
304 		copyhdr = 1;
305 	while (off > 0) {
306 		if (m == 0)
307 			panic("m_copym");
308 		if (off < m->m_len)
309 			break;
310 		off -= m->m_len;
311 		m = m->m_next;
312 	}
313 	np = &top;
314 	top = 0;
315 	while (len > 0) {
316 		if (m == 0) {
317 			if (len != M_COPYALL)
318 				panic("m_copym");
319 			break;
320 		}
321 		MGET(n, wait, m->m_type);
322 		*np = n;
323 		if (n == 0)
324 			goto nospace;
325 		if (copyhdr) {
326 			M_COPY_PKTHDR(n, m);
327 			if (len == M_COPYALL)
328 				n->m_pkthdr.len -= off0;
329 			else
330 				n->m_pkthdr.len = len;
331 			copyhdr = 0;
332 		}
333 		n->m_len = min(len, m->m_len - off);
334 		if (m->m_flags & M_EXT) {
335 			n->m_data = m->m_data + off;
336 			mclrefcnt[mtocl(m->m_ext.ext_buf)]++;
337 			n->m_ext = m->m_ext;
338 			n->m_flags |= M_EXT;
339 		} else
340 			bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
341 			    (unsigned)n->m_len);
342 		if (len != M_COPYALL)
343 			len -= n->m_len;
344 		off = 0;
345 		m = m->m_next;
346 		np = &n->m_next;
347 	}
348 	if (top == 0)
349 		MCFail++;
350 	return (top);
351 nospace:
352 	m_freem(top);
353 	MCFail++;
354 	return (0);
355 }
356 
357 /*
358  * Copy data from an mbuf chain starting "off" bytes from the beginning,
359  * continuing for "len" bytes, into the indicated buffer.
360  */
361 void
362 m_copydata(m, off, len, cp)
363 	register struct mbuf *m;
364 	register int off;
365 	register int len;
366 	caddr_t cp;
367 {
368 	register unsigned count;
369 
370 	if (off < 0 || len < 0)
371 		panic("m_copydata");
372 	while (off > 0) {
373 		if (m == 0)
374 			panic("m_copydata");
375 		if (off < m->m_len)
376 			break;
377 		off -= m->m_len;
378 		m = m->m_next;
379 	}
380 	while (len > 0) {
381 		if (m == 0)
382 			panic("m_copydata");
383 		count = min(m->m_len - off, len);
384 		bcopy(mtod(m, caddr_t) + off, cp, count);
385 		len -= count;
386 		cp += count;
387 		off = 0;
388 		m = m->m_next;
389 	}
390 }
391 
392 /*
393  * Concatenate mbuf chain n to m.
394  * Both chains must be of the same type (e.g. MT_DATA).
395  * Any m_pkthdr is not updated.
396  */
397 void
398 m_cat(m, n)
399 	register struct mbuf *m, *n;
400 {
401 	while (m->m_next)
402 		m = m->m_next;
403 	while (n) {
404 		if (m->m_flags & M_EXT ||
405 		    m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
406 			/* just join the two chains */
407 			m->m_next = n;
408 			return;
409 		}
410 		/* splat the data from one into the other */
411 		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
412 		    (u_int)n->m_len);
413 		m->m_len += n->m_len;
414 		n = m_free(n);
415 	}
416 }
417 
418 void
419 m_adj(mp, req_len)
420 	struct mbuf *mp;
421 	int req_len;
422 {
423 	register int len = req_len;
424 	register struct mbuf *m;
425 	register count;
426 
427 	if ((m = mp) == NULL)
428 		return;
429 	if (len >= 0) {
430 		/*
431 		 * Trim from head.
432 		 */
433 		while (m != NULL && len > 0) {
434 			if (m->m_len <= len) {
435 				len -= m->m_len;
436 				m->m_len = 0;
437 				m = m->m_next;
438 			} else {
439 				m->m_len -= len;
440 				m->m_data += len;
441 				len = 0;
442 			}
443 		}
444 		m = mp;
445 		if (mp->m_flags & M_PKTHDR)
446 			m->m_pkthdr.len -= (req_len - len);
447 	} else {
448 		/*
449 		 * Trim from tail.  Scan the mbuf chain,
450 		 * calculating its length and finding the last mbuf.
451 		 * If the adjustment only affects this mbuf, then just
452 		 * adjust and return.  Otherwise, rescan and truncate
453 		 * after the remaining size.
454 		 */
455 		len = -len;
456 		count = 0;
457 		for (;;) {
458 			count += m->m_len;
459 			if (m->m_next == (struct mbuf *)0)
460 				break;
461 			m = m->m_next;
462 		}
463 		if (m->m_len >= len) {
464 			m->m_len -= len;
465 			if (mp->m_flags & M_PKTHDR)
466 				mp->m_pkthdr.len -= len;
467 			return;
468 		}
469 		count -= len;
470 		if (count < 0)
471 			count = 0;
472 		/*
473 		 * Correct length for chain is "count".
474 		 * Find the mbuf with last data, adjust its length,
475 		 * and toss data from remaining mbufs on chain.
476 		 */
477 		m = mp;
478 		if (m->m_flags & M_PKTHDR)
479 			m->m_pkthdr.len = count;
480 		for (; m; m = m->m_next) {
481 			if (m->m_len >= count) {
482 				m->m_len = count;
483 				break;
484 			}
485 			count -= m->m_len;
486 		}
487 		while (m->m_next)
488 			(m = m->m_next) ->m_len = 0;
489 	}
490 }
491 
492 /*
493  * Rearange an mbuf chain so that len bytes are contiguous
494  * and in the data area of an mbuf (so that mtod and dtom
495  * will work for a structure of size len).  Returns the resulting
496  * mbuf chain on success, frees it and returns null on failure.
497  * If there is room, it will add up to max_protohdr-len extra bytes to the
498  * contiguous region in an attempt to avoid being called next time.
499  */
500 static int MPFail;
501 
502 struct mbuf *
503 m_pullup(n, len)
504 	register struct mbuf *n;
505 	int len;
506 {
507 	register struct mbuf *m;
508 	register int count;
509 	int space;
510 
511 	/*
512 	 * If first mbuf has no cluster, and has room for len bytes
513 	 * without shifting current data, pullup into it,
514 	 * otherwise allocate a new mbuf to prepend to the chain.
515 	 */
516 	if ((n->m_flags & M_EXT) == 0 &&
517 	    n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
518 		if (n->m_len >= len)
519 			return (n);
520 		m = n;
521 		n = n->m_next;
522 		len -= m->m_len;
523 	} else {
524 		if (len > MHLEN)
525 			goto bad;
526 		MGET(m, M_DONTWAIT, n->m_type);
527 		if (m == 0)
528 			goto bad;
529 		m->m_len = 0;
530 		if (n->m_flags & M_PKTHDR) {
531 			M_COPY_PKTHDR(m, n);
532 			n->m_flags &= ~M_PKTHDR;
533 		}
534 	}
535 	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
536 	do {
537 		count = min(min(max(len, max_protohdr), space), n->m_len);
538 		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
539 		  (unsigned)count);
540 		len -= count;
541 		m->m_len += count;
542 		n->m_len -= count;
543 		space -= count;
544 		if (n->m_len)
545 			n->m_data += count;
546 		else
547 			n = m_free(n);
548 	} while (len > 0 && n);
549 	if (len > 0) {
550 		(void) m_free(m);
551 		goto bad;
552 	}
553 	m->m_next = n;
554 	return (m);
555 bad:
556 	m_freem(n);
557 	MPFail++;
558 	return (0);
559 }
560 
561 /*
562  * Partition an mbuf chain in two pieces, returning the tail --
563  * all but the first len0 bytes.  In case of failure, it returns NULL and
564  * attempts to restore the chain to its original state.
565  */
566 struct mbuf *
567 m_split(m0, len0, wait)
568 	register struct mbuf *m0;
569 	int len0, wait;
570 {
571 	register struct mbuf *m, *n;
572 	unsigned len = len0, remain;
573 
574 	for (m = m0; m && len > m->m_len; m = m->m_next)
575 		len -= m->m_len;
576 	if (m == 0)
577 		return (0);
578 	remain = m->m_len - len;
579 	if (m0->m_flags & M_PKTHDR) {
580 		MGETHDR(n, wait, m0->m_type);
581 		if (n == 0)
582 			return (0);
583 		n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
584 		n->m_pkthdr.len = m0->m_pkthdr.len - len0;
585 		m0->m_pkthdr.len = len0;
586 		if (m->m_flags & M_EXT)
587 			goto extpacket;
588 		if (remain > MHLEN) {
589 			/* m can't be the lead packet */
590 			MH_ALIGN(n, 0);
591 			n->m_next = m_split(m, len, wait);
592 			if (n->m_next == 0) {
593 				(void) m_free(n);
594 				return (0);
595 			} else
596 				return (n);
597 		} else
598 			MH_ALIGN(n, remain);
599 	} else if (remain == 0) {
600 		n = m->m_next;
601 		m->m_next = 0;
602 		return (n);
603 	} else {
604 		MGET(n, wait, m->m_type);
605 		if (n == 0)
606 			return (0);
607 		M_ALIGN(n, remain);
608 	}
609 extpacket:
610 	if (m->m_flags & M_EXT) {
611 		n->m_flags |= M_EXT;
612 		n->m_ext = m->m_ext;
613 		mclrefcnt[mtocl(m->m_ext.ext_buf)]++;
614 		m->m_ext.ext_size = 0; /* For Accounting XXXXXX danger */
615 		n->m_data = m->m_data + len;
616 	} else {
617 		bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
618 	}
619 	n->m_len = remain;
620 	m->m_len = len;
621 	n->m_next = m->m_next;
622 	m->m_next = 0;
623 	return (n);
624 }
625 /*
626  * Routine to copy from device local memory into mbufs.
627  */
628 struct mbuf *
629 m_devget(buf, totlen, off0, ifp, copy)
630 	char *buf;
631 	int totlen, off0;
632 	struct ifnet *ifp;
633 	void (*copy) __P((char *from, caddr_t to, u_int len));
634 {
635 	register struct mbuf *m;
636 	struct mbuf *top = 0, **mp = &top;
637 	register int off = off0, len;
638 	register char *cp;
639 	char *epkt;
640 
641 	cp = buf;
642 	epkt = cp + totlen;
643 	if (off) {
644 		cp += off + 2 * sizeof(u_short);
645 		totlen -= 2 * sizeof(u_short);
646 	}
647 	MGETHDR(m, M_DONTWAIT, MT_DATA);
648 	if (m == 0)
649 		return (0);
650 	m->m_pkthdr.rcvif = ifp;
651 	m->m_pkthdr.len = totlen;
652 	m->m_len = MHLEN;
653 
654 	while (totlen > 0) {
655 		if (top) {
656 			MGET(m, M_DONTWAIT, MT_DATA);
657 			if (m == 0) {
658 				m_freem(top);
659 				return (0);
660 			}
661 			m->m_len = MLEN;
662 		}
663 		len = min(totlen, epkt - cp);
664 		if (len >= MINCLSIZE) {
665 			MCLGET(m, M_DONTWAIT);
666 			if (m->m_flags & M_EXT)
667 				m->m_len = len = min(len, MCLBYTES);
668 			else
669 				len = m->m_len;
670 		} else {
671 			/*
672 			 * Place initial small packet/header at end of mbuf.
673 			 */
674 			if (len < m->m_len) {
675 				if (top == 0 && len + max_linkhdr <= m->m_len)
676 					m->m_data += max_linkhdr;
677 				m->m_len = len;
678 			} else
679 				len = m->m_len;
680 		}
681 		if (copy)
682 			copy(cp, mtod(m, caddr_t), (unsigned)len);
683 		else
684 			bcopy(cp, mtod(m, caddr_t), (unsigned)len);
685 		cp += len;
686 		*mp = m;
687 		mp = &m->m_next;
688 		totlen -= len;
689 		if (cp == epkt)
690 			cp = buf;
691 	}
692 	return (top);
693 }
694 
695 /*
696  * Copy data from a buffer back into the indicated mbuf chain,
697  * starting "off" bytes from the beginning, extending the mbuf
698  * chain if necessary.
699  */
700 void
701 m_copyback(m0, off, len, cp)
702 	struct	mbuf *m0;
703 	register int off;
704 	register int len;
705 	caddr_t cp;
706 {
707 	register int mlen;
708 	register struct mbuf *m = m0, *n;
709 	int totlen = 0;
710 
711 	if (m0 == 0)
712 		return;
713 	while (off > (mlen = m->m_len)) {
714 		off -= mlen;
715 		totlen += mlen;
716 		if (m->m_next == 0) {
717 			n = m_getclr(M_DONTWAIT, m->m_type);
718 			if (n == 0)
719 				goto out;
720 			n->m_len = min(MLEN, len + off);
721 			m->m_next = n;
722 		}
723 		m = m->m_next;
724 	}
725 	while (len > 0) {
726 		mlen = min (m->m_len - off, len);
727 		bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen);
728 		cp += mlen;
729 		len -= mlen;
730 		mlen += off;
731 		off = 0;
732 		totlen += mlen;
733 		if (len == 0)
734 			break;
735 		if (m->m_next == 0) {
736 			n = m_get(M_DONTWAIT, m->m_type);
737 			if (n == 0)
738 				break;
739 			n->m_len = min(MLEN, len);
740 			m->m_next = n;
741 		}
742 		m = m->m_next;
743 	}
744 out:	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
745 		m->m_pkthdr.len = totlen;
746 }
747