xref: /freebsd/sys/kern/uipc_mbuf.c (revision 0b87f79976047c8f4332bbf7dc03146f6b0de79f)
1 /*
2  * Copyright (c) 1982, 1986, 1988, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
34  * $FreeBSD$
35  */
36 
37 #include "opt_param.h"
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/kernel.h>
41 #include <sys/lock.h>
42 #include <sys/malloc.h>
43 #include <sys/mbuf.h>
44 #include <sys/sysctl.h>
45 #include <sys/domain.h>
46 #include <sys/protosw.h>
47 
48 int	max_linkhdr;
49 int	max_protohdr;
50 int	max_hdr;
51 int	max_datalen;
52 
53 /*
54  * sysctl(8) exported objects
55  */
56 SYSCTL_DECL(_kern_ipc);
57 SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW,
58 	   &max_linkhdr, 0, "");
59 SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW,
60 	   &max_protohdr, 0, "");
61 SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, "");
62 SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW,
63 	   &max_datalen, 0, "");
64 
65 void
66 m_freem(struct mbuf *m)
67 {
68 	while (m) {
69 		m = m_free(m);
70 	}
71 }
72 
73 /*
74  * Lesser-used path for M_PREPEND:
75  * allocate new mbuf to prepend to chain,
76  * copy junk along.
77  */
78 struct mbuf *
79 m_prepend(struct mbuf *m, int len, int how)
80 {
81 	struct mbuf *mn;
82 
83 	MGET(mn, how, m->m_type);
84 	if (mn == NULL) {
85 		m_freem(m);
86 		return (NULL);
87 	}
88 	if (m->m_flags & M_PKTHDR) {
89 		M_COPY_PKTHDR(mn, m);
90 		m->m_flags &= ~M_PKTHDR;
91 	}
92 	mn->m_next = m;
93 	m = mn;
94 	if (len < MHLEN)
95 		MH_ALIGN(m, len);
96 	m->m_len = len;
97 	return (m);
98 }
99 
100 /*
101  * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
102  * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
103  * The wait parameter is a choice of M_TRYWAIT/M_DONTWAIT from caller.
104  * Note that the copy is read-only, because clusters are not copied,
105  * only their reference counts are incremented.
106  */
107 struct mbuf *
108 m_copym(struct mbuf *m, int off0, int len, int wait)
109 {
110 	struct mbuf *n, **np;
111 	int off = off0;
112 	struct mbuf *top;
113 	int copyhdr = 0;
114 
115 	KASSERT(off >= 0, ("m_copym, negative off %d", off));
116 	KASSERT(len >= 0, ("m_copym, negative len %d", len));
117 	if (off == 0 && m->m_flags & M_PKTHDR)
118 		copyhdr = 1;
119 	while (off > 0) {
120 		KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain"));
121 		if (off < m->m_len)
122 			break;
123 		off -= m->m_len;
124 		m = m->m_next;
125 	}
126 	np = &top;
127 	top = 0;
128 	while (len > 0) {
129 		if (m == NULL) {
130 			KASSERT(len == M_COPYALL,
131 			    ("m_copym, length > size of mbuf chain"));
132 			break;
133 		}
134 		MGET(n, wait, m->m_type);
135 		*np = n;
136 		if (n == NULL)
137 			goto nospace;
138 		if (copyhdr) {
139 			M_COPY_PKTHDR(n, m);
140 			if (len == M_COPYALL)
141 				n->m_pkthdr.len -= off0;
142 			else
143 				n->m_pkthdr.len = len;
144 			copyhdr = 0;
145 		}
146 		n->m_len = min(len, m->m_len - off);
147 		if (m->m_flags & M_EXT) {
148 			n->m_data = m->m_data + off;
149 			n->m_ext = m->m_ext;
150 			n->m_flags |= M_EXT;
151 			MEXT_ADD_REF(m);
152 		} else
153 			bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
154 			    (unsigned)n->m_len);
155 		if (len != M_COPYALL)
156 			len -= n->m_len;
157 		off = 0;
158 		m = m->m_next;
159 		np = &n->m_next;
160 	}
161 	if (top == NULL)
162 		mbstat.m_mcfail++;	/* XXX: No consistency. */
163 
164 	return (top);
165 nospace:
166 	m_freem(top);
167 	mbstat.m_mcfail++;	/* XXX: No consistency. */
168 	return (NULL);
169 }
170 
171 /*
172  * Copy an entire packet, including header (which must be present).
173  * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'.
174  * Note that the copy is read-only, because clusters are not copied,
175  * only their reference counts are incremented.
176  * Preserve alignment of the first mbuf so if the creator has left
177  * some room at the beginning (e.g. for inserting protocol headers)
178  * the copies still have the room available.
179  */
180 struct mbuf *
181 m_copypacket(struct mbuf *m, int how)
182 {
183 	struct mbuf *top, *n, *o;
184 
185 	MGET(n, how, m->m_type);
186 	top = n;
187 	if (n == NULL)
188 		goto nospace;
189 
190 	M_COPY_PKTHDR(n, m);
191 	n->m_len = m->m_len;
192 	if (m->m_flags & M_EXT) {
193 		n->m_data = m->m_data;
194 		n->m_ext = m->m_ext;
195 		n->m_flags |= M_EXT;
196 		MEXT_ADD_REF(m);
197 	} else {
198 		n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat );
199 		bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
200 	}
201 
202 	m = m->m_next;
203 	while (m) {
204 		MGET(o, how, m->m_type);
205 		if (o == NULL)
206 			goto nospace;
207 
208 		n->m_next = o;
209 		n = n->m_next;
210 
211 		n->m_len = m->m_len;
212 		if (m->m_flags & M_EXT) {
213 			n->m_data = m->m_data;
214 			n->m_ext = m->m_ext;
215 			n->m_flags |= M_EXT;
216 			MEXT_ADD_REF(m);
217 		} else {
218 			bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
219 		}
220 
221 		m = m->m_next;
222 	}
223 	return top;
224 nospace:
225 	m_freem(top);
226 	mbstat.m_mcfail++;	/* XXX: No consistency. */
227 	return (NULL);
228 }
229 
230 /*
231  * Copy data from an mbuf chain starting "off" bytes from the beginning,
232  * continuing for "len" bytes, into the indicated buffer.
233  */
234 void
235 m_copydata(const struct mbuf *m, int off, int len, caddr_t cp)
236 {
237 	unsigned count;
238 
239 	KASSERT(off >= 0, ("m_copydata, negative off %d", off));
240 	KASSERT(len >= 0, ("m_copydata, negative len %d", len));
241 	while (off > 0) {
242 		KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain"));
243 		if (off < m->m_len)
244 			break;
245 		off -= m->m_len;
246 		m = m->m_next;
247 	}
248 	while (len > 0) {
249 		KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain"));
250 		count = min(m->m_len - off, len);
251 		bcopy(mtod(m, caddr_t) + off, cp, count);
252 		len -= count;
253 		cp += count;
254 		off = 0;
255 		m = m->m_next;
256 	}
257 }
258 
259 /*
260  * Copy a packet header mbuf chain into a completely new chain, including
261  * copying any mbuf clusters.  Use this instead of m_copypacket() when
262  * you need a writable copy of an mbuf chain.
263  */
264 struct mbuf *
265 m_dup(struct mbuf *m, int how)
266 {
267 	struct mbuf **p, *top = NULL;
268 	int remain, moff, nsize;
269 
270 	/* Sanity check */
271 	if (m == NULL)
272 		return (NULL);
273 	KASSERT((m->m_flags & M_PKTHDR) != 0, ("%s: !PKTHDR", __func__));
274 
275 	/* While there's more data, get a new mbuf, tack it on, and fill it */
276 	remain = m->m_pkthdr.len;
277 	moff = 0;
278 	p = &top;
279 	while (remain > 0 || top == NULL) {	/* allow m->m_pkthdr.len == 0 */
280 		struct mbuf *n;
281 
282 		/* Get the next new mbuf */
283 		MGET(n, how, m->m_type);
284 		if (n == NULL)
285 			goto nospace;
286 		if (top == NULL) {		/* first one, must be PKTHDR */
287 			M_COPY_PKTHDR(n, m);
288 			nsize = MHLEN;
289 		} else				/* not the first one */
290 			nsize = MLEN;
291 		if (remain >= MINCLSIZE) {
292 			MCLGET(n, how);
293 			if ((n->m_flags & M_EXT) == 0) {
294 				(void)m_free(n);
295 				goto nospace;
296 			}
297 			nsize = MCLBYTES;
298 		}
299 		n->m_len = 0;
300 
301 		/* Link it into the new chain */
302 		*p = n;
303 		p = &n->m_next;
304 
305 		/* Copy data from original mbuf(s) into new mbuf */
306 		while (n->m_len < nsize && m != NULL) {
307 			int chunk = min(nsize - n->m_len, m->m_len - moff);
308 
309 			bcopy(m->m_data + moff, n->m_data + n->m_len, chunk);
310 			moff += chunk;
311 			n->m_len += chunk;
312 			remain -= chunk;
313 			if (moff == m->m_len) {
314 				m = m->m_next;
315 				moff = 0;
316 			}
317 		}
318 
319 		/* Check correct total mbuf length */
320 		KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL),
321 		    	("%s: bogus m_pkthdr.len", __func__));
322 	}
323 	return (top);
324 
325 nospace:
326 	m_freem(top);
327 	mbstat.m_mcfail++;	/* XXX: No consistency. */
328 	return (NULL);
329 }
330 
331 /*
332  * Concatenate mbuf chain n to m.
333  * Both chains must be of the same type (e.g. MT_DATA).
334  * Any m_pkthdr is not updated.
335  */
336 void
337 m_cat(struct mbuf *m, struct mbuf *n)
338 {
339 	while (m->m_next)
340 		m = m->m_next;
341 	while (n) {
342 		if (m->m_flags & M_EXT ||
343 		    m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
344 			/* just join the two chains */
345 			m->m_next = n;
346 			return;
347 		}
348 		/* splat the data from one into the other */
349 		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
350 		    (u_int)n->m_len);
351 		m->m_len += n->m_len;
352 		n = m_free(n);
353 	}
354 }
355 
356 void
357 m_adj(struct mbuf *mp, int req_len)
358 {
359 	int len = req_len;
360 	struct mbuf *m;
361 	int count;
362 
363 	if ((m = mp) == NULL)
364 		return;
365 	if (len >= 0) {
366 		/*
367 		 * Trim from head.
368 		 */
369 		while (m != NULL && len > 0) {
370 			if (m->m_len <= len) {
371 				len -= m->m_len;
372 				m->m_len = 0;
373 				m = m->m_next;
374 			} else {
375 				m->m_len -= len;
376 				m->m_data += len;
377 				len = 0;
378 			}
379 		}
380 		m = mp;
381 		if (mp->m_flags & M_PKTHDR)
382 			m->m_pkthdr.len -= (req_len - len);
383 	} else {
384 		/*
385 		 * Trim from tail.  Scan the mbuf chain,
386 		 * calculating its length and finding the last mbuf.
387 		 * If the adjustment only affects this mbuf, then just
388 		 * adjust and return.  Otherwise, rescan and truncate
389 		 * after the remaining size.
390 		 */
391 		len = -len;
392 		count = 0;
393 		for (;;) {
394 			count += m->m_len;
395 			if (m->m_next == (struct mbuf *)0)
396 				break;
397 			m = m->m_next;
398 		}
399 		if (m->m_len >= len) {
400 			m->m_len -= len;
401 			if (mp->m_flags & M_PKTHDR)
402 				mp->m_pkthdr.len -= len;
403 			return;
404 		}
405 		count -= len;
406 		if (count < 0)
407 			count = 0;
408 		/*
409 		 * Correct length for chain is "count".
410 		 * Find the mbuf with last data, adjust its length,
411 		 * and toss data from remaining mbufs on chain.
412 		 */
413 		m = mp;
414 		if (m->m_flags & M_PKTHDR)
415 			m->m_pkthdr.len = count;
416 		for (; m; m = m->m_next) {
417 			if (m->m_len >= count) {
418 				m->m_len = count;
419 				break;
420 			}
421 			count -= m->m_len;
422 		}
423 		while (m->m_next)
424 			(m = m->m_next) ->m_len = 0;
425 	}
426 }
427 
428 /*
429  * Rearange an mbuf chain so that len bytes are contiguous
430  * and in the data area of an mbuf (so that mtod and dtom
431  * will work for a structure of size len).  Returns the resulting
432  * mbuf chain on success, frees it and returns null on failure.
433  * If there is room, it will add up to max_protohdr-len extra bytes to the
434  * contiguous region in an attempt to avoid being called next time.
435  */
436 struct mbuf *
437 m_pullup(struct mbuf *n, int len)
438 {
439 	struct mbuf *m;
440 	int count;
441 	int space;
442 
443 	/*
444 	 * If first mbuf has no cluster, and has room for len bytes
445 	 * without shifting current data, pullup into it,
446 	 * otherwise allocate a new mbuf to prepend to the chain.
447 	 */
448 	if ((n->m_flags & M_EXT) == 0 &&
449 	    n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
450 		if (n->m_len >= len)
451 			return (n);
452 		m = n;
453 		n = n->m_next;
454 		len -= m->m_len;
455 	} else {
456 		if (len > MHLEN)
457 			goto bad;
458 		MGET(m, M_DONTWAIT, n->m_type);
459 		if (m == NULL)
460 			goto bad;
461 		m->m_len = 0;
462 		if (n->m_flags & M_PKTHDR) {
463 			M_COPY_PKTHDR(m, n);
464 			n->m_flags &= ~M_PKTHDR;
465 		}
466 	}
467 	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
468 	do {
469 		count = min(min(max(len, max_protohdr), space), n->m_len);
470 		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
471 		  (unsigned)count);
472 		len -= count;
473 		m->m_len += count;
474 		n->m_len -= count;
475 		space -= count;
476 		if (n->m_len)
477 			n->m_data += count;
478 		else
479 			n = m_free(n);
480 	} while (len > 0 && n);
481 	if (len > 0) {
482 		(void) m_free(m);
483 		goto bad;
484 	}
485 	m->m_next = n;
486 	return (m);
487 bad:
488 	m_freem(n);
489 	mbstat.m_mpfail++;	/* XXX: No consistency. */
490 	return (NULL);
491 }
492 
493 /*
494  * Partition an mbuf chain in two pieces, returning the tail --
495  * all but the first len0 bytes.  In case of failure, it returns NULL and
496  * attempts to restore the chain to its original state.
497  *
498  * Note that the resulting mbufs might be read-only, because the new
499  * mbuf can end up sharing an mbuf cluster with the original mbuf if
500  * the "breaking point" happens to lie within a cluster mbuf. Use the
501  * M_WRITABLE() macro to check for this case.
502  */
503 struct mbuf *
504 m_split(struct mbuf *m0, int len0, int wait)
505 {
506 	struct mbuf *m, *n;
507 	unsigned len = len0, remain;
508 
509 	for (m = m0; m && len > m->m_len; m = m->m_next)
510 		len -= m->m_len;
511 	if (m == NULL)
512 		return (NULL);
513 	remain = m->m_len - len;
514 	if (m0->m_flags & M_PKTHDR) {
515 		MGETHDR(n, wait, m0->m_type);
516 		if (n == NULL)
517 			return (NULL);
518 		n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
519 		n->m_pkthdr.len = m0->m_pkthdr.len - len0;
520 		m0->m_pkthdr.len = len0;
521 		if (m->m_flags & M_EXT)
522 			goto extpacket;
523 		if (remain > MHLEN) {
524 			/* m can't be the lead packet */
525 			MH_ALIGN(n, 0);
526 			n->m_next = m_split(m, len, wait);
527 			if (n->m_next == NULL) {
528 				(void) m_free(n);
529 				return (NULL);
530 			} else {
531 				n->m_len = 0;
532 				return (n);
533 			}
534 		} else
535 			MH_ALIGN(n, remain);
536 	} else if (remain == 0) {
537 		n = m->m_next;
538 		m->m_next = NULL;
539 		return (n);
540 	} else {
541 		MGET(n, wait, m->m_type);
542 		if (n == NULL)
543 			return (NULL);
544 		M_ALIGN(n, remain);
545 	}
546 extpacket:
547 	if (m->m_flags & M_EXT) {
548 		n->m_flags |= M_EXT;
549 		n->m_ext = m->m_ext;
550 		MEXT_ADD_REF(m);
551 		n->m_data = m->m_data + len;
552 	} else {
553 		bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
554 	}
555 	n->m_len = remain;
556 	m->m_len = len;
557 	n->m_next = m->m_next;
558 	m->m_next = NULL;
559 	return (n);
560 }
561 /*
562  * Routine to copy from device local memory into mbufs.
563  * Note that `off' argument is offset into first mbuf of target chain from
564  * which to begin copying the data to.
565  */
566 struct mbuf *
567 m_devget(char *buf, int totlen, int off, struct ifnet *ifp,
568 	 void (*copy)(char *from, caddr_t to, u_int len))
569 {
570 	struct mbuf *m;
571 	struct mbuf *top = 0, **mp = &top;
572 	int len;
573 
574 	if (off < 0 || off > MHLEN)
575 		return (NULL);
576 
577 	MGETHDR(m, M_DONTWAIT, MT_DATA);
578 	if (m == NULL)
579 		return (NULL);
580 	m->m_pkthdr.rcvif = ifp;
581 	m->m_pkthdr.len = totlen;
582 	len = MHLEN;
583 
584 	while (totlen > 0) {
585 		if (top) {
586 			MGET(m, M_DONTWAIT, MT_DATA);
587 			if (m == NULL) {
588 				m_freem(top);
589 				return (NULL);
590 			}
591 			len = MLEN;
592 		}
593 		if (totlen + off >= MINCLSIZE) {
594 			MCLGET(m, M_DONTWAIT);
595 			if (m->m_flags & M_EXT)
596 				len = MCLBYTES;
597 		} else {
598 			/*
599 			 * Place initial small packet/header at end of mbuf.
600 			 */
601 			if (top == NULL && totlen + off + max_linkhdr <= len) {
602 				m->m_data += max_linkhdr;
603 				len -= max_linkhdr;
604 			}
605 		}
606 		if (off) {
607 			m->m_data += off;
608 			len -= off;
609 			off = 0;
610 		}
611 		m->m_len = len = min(totlen, len);
612 		if (copy)
613 			copy(buf, mtod(m, caddr_t), (unsigned)len);
614 		else
615 			bcopy(buf, mtod(m, caddr_t), (unsigned)len);
616 		buf += len;
617 		*mp = m;
618 		mp = &m->m_next;
619 		totlen -= len;
620 	}
621 	return (top);
622 }
623 
624 /*
625  * Copy data from a buffer back into the indicated mbuf chain,
626  * starting "off" bytes from the beginning, extending the mbuf
627  * chain if necessary.
628  */
629 void
630 m_copyback(struct mbuf *m0, int off, int len, caddr_t cp)
631 {
632 	int mlen;
633 	struct mbuf *m = m0, *n;
634 	int totlen = 0;
635 
636 	if (m0 == NULL)
637 		return;
638 	while (off > (mlen = m->m_len)) {
639 		off -= mlen;
640 		totlen += mlen;
641 		if (m->m_next == NULL) {
642 			n = m_get_clrd(M_DONTWAIT, m->m_type);
643 			if (n == NULL)
644 				goto out;
645 			n->m_len = min(MLEN, len + off);
646 			m->m_next = n;
647 		}
648 		m = m->m_next;
649 	}
650 	while (len > 0) {
651 		mlen = min (m->m_len - off, len);
652 		bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen);
653 		cp += mlen;
654 		len -= mlen;
655 		mlen += off;
656 		off = 0;
657 		totlen += mlen;
658 		if (len == 0)
659 			break;
660 		if (m->m_next == NULL) {
661 			n = m_get(M_DONTWAIT, m->m_type);
662 			if (n == NULL)
663 				break;
664 			n->m_len = min(MLEN, len);
665 			m->m_next = n;
666 		}
667 		m = m->m_next;
668 	}
669 out:	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
670 		m->m_pkthdr.len = totlen;
671 }
672 
673 void
674 m_print(const struct mbuf *m)
675 {
676 	int len;
677 	const struct mbuf *m2;
678 
679 	len = m->m_pkthdr.len;
680 	m2 = m;
681 	while (len) {
682 		printf("%p %*D\n", m2, m2->m_len, (u_char *)m2->m_data, "-");
683 		len -= m2->m_len;
684 		m2 = m2->m_next;
685 	}
686 	return;
687 }
688