xref: /freebsd/sys/kern/uipc_mbuf2.c (revision 4f29da19bd44f0e99f021510460a81bf754c21d2)
1 /*	$KAME: uipc_mbuf2.c,v 1.31 2001/11/28 11:08:53 itojun Exp $	*/
2 /*	$NetBSD: uipc_mbuf.c,v 1.40 1999/04/01 00:23:25 thorpej Exp $	*/
3 
4 /*-
5  * Copyright (C) 1999 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 /*-
33  * Copyright (c) 1982, 1986, 1988, 1991, 1993
34  *	The Regents of the University of California.  All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 4. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *	@(#)uipc_mbuf.c	8.4 (Berkeley) 2/14/95
61  */
62 
63 #include <sys/cdefs.h>
64 __FBSDID("$FreeBSD$");
65 
66 /*#define PULLDOWN_DEBUG*/
67 
68 #include "opt_mac.h"
69 
70 #include <sys/param.h>
71 #include <sys/systm.h>
72 #include <sys/kernel.h>
73 #include <sys/lock.h>
74 #include <sys/mac.h>
75 #include <sys/malloc.h>
76 #include <sys/mbuf.h>
77 #include <sys/mutex.h>
78 
79 static MALLOC_DEFINE(M_PACKET_TAGS, MBUF_TAG_MEM_NAME,
80     "packet-attached information");
81 
82 /* can't call it m_dup(), as freebsd[34] uses m_dup() with different arg */
83 static struct mbuf *m_dup1(struct mbuf *, int, int, int);
84 
85 /*
86  * ensure that [off, off + len) is contiguous on the mbuf chain "m".
87  * packet chain before "off" is kept untouched.
88  * if offp == NULL, the target will start at <retval, 0> on resulting chain.
89  * if offp != NULL, the target will start at <retval, *offp> on resulting chain.
90  *
91  * on error return (NULL return value), original "m" will be freed.
92  *
93  * XXX: M_TRAILINGSPACE/M_LEADINGSPACE only permitted on writable ext_buf.
94  */
95 struct mbuf *
96 m_pulldown(struct mbuf *m, int off, int len, int *offp)
97 {
98 	struct mbuf *n, *o;
99 	int hlen, tlen, olen;
100 	int writable;
101 
102 	/* check invalid arguments. */
103 	if (m == NULL)
104 		panic("m == NULL in m_pulldown()");
105 	if (len > MCLBYTES) {
106 		m_freem(m);
107 		return NULL;	/* impossible */
108 	}
109 
110 #ifdef PULLDOWN_DEBUG
111     {
112 	struct mbuf *t;
113 	printf("before:");
114 	for (t = m; t; t = t->m_next)
115 		printf(" %d", t->m_len);
116 	printf("\n");
117     }
118 #endif
119 	n = m;
120 	while (n != NULL && off > 0) {
121 		if (n->m_len > off)
122 			break;
123 		off -= n->m_len;
124 		n = n->m_next;
125 	}
126 	/* be sure to point non-empty mbuf */
127 	while (n != NULL && n->m_len == 0)
128 		n = n->m_next;
129 	if (!n) {
130 		m_freem(m);
131 		return NULL;	/* mbuf chain too short */
132 	}
133 
134 	/*
135 	 * XXX: This code is flawed because it considers a "writable" mbuf
136 	 *      data region to require all of the following:
137 	 *	  (i) mbuf _has_ to have M_EXT set; if it is just a regular
138 	 *	      mbuf, it is still not considered "writable."
139 	 *	  (ii) since mbuf has M_EXT, the ext_type _has_ to be
140 	 *	       EXT_CLUSTER. Anything else makes it non-writable.
141 	 *	  (iii) M_WRITABLE() must evaluate true.
142 	 *      Ideally, the requirement should only be (iii).
143 	 *
144 	 * If we're writable, we're sure we're writable, because the ref. count
145 	 * cannot increase from 1, as that would require posession of mbuf
146 	 * n by someone else (which is impossible). However, if we're _not_
147 	 * writable, we may eventually become writable )if the ref. count drops
148 	 * to 1), but we'll fail to notice it unless we re-evaluate
149 	 * M_WRITABLE(). For now, we only evaluate once at the beginning and
150 	 * live with this.
151 	 */
152 	/*
153 	 * XXX: This is dumb. If we're just a regular mbuf with no M_EXT,
154 	 *      then we're not "writable," according to this code.
155 	 */
156 	writable = 0;
157 	if ((n->m_flags & M_EXT) == 0 ||
158 	    (n->m_ext.ext_type == EXT_CLUSTER && M_WRITABLE(n)))
159 		writable = 1;
160 
161 	/*
162 	 * the target data is on <n, off>.
163 	 * if we got enough data on the mbuf "n", we're done.
164 	 */
165 	if ((off == 0 || offp) && len <= n->m_len - off && writable)
166 		goto ok;
167 
168 	/*
169 	 * when len <= n->m_len - off and off != 0, it is a special case.
170 	 * len bytes from <n, off> sits in single mbuf, but the caller does
171 	 * not like the starting position (off).
172 	 * chop the current mbuf into two pieces, set off to 0.
173 	 */
174 	if (len <= n->m_len - off) {
175 		o = m_dup1(n, off, n->m_len - off, M_DONTWAIT);
176 		if (o == NULL) {
177 			m_freem(m);
178 			return NULL;	/* ENOBUFS */
179 		}
180 		n->m_len = off;
181 		o->m_next = n->m_next;
182 		n->m_next = o;
183 		n = n->m_next;
184 		off = 0;
185 		goto ok;
186 	}
187 
188 	/*
189 	 * we need to take hlen from <n, off> and tlen from <n->m_next, 0>,
190 	 * and construct contiguous mbuf with m_len == len.
191 	 * note that hlen + tlen == len, and tlen > 0.
192 	 */
193 	hlen = n->m_len - off;
194 	tlen = len - hlen;
195 
196 	/*
197 	 * ensure that we have enough trailing data on mbuf chain.
198 	 * if not, we can do nothing about the chain.
199 	 */
200 	olen = 0;
201 	for (o = n->m_next; o != NULL; o = o->m_next)
202 		olen += o->m_len;
203 	if (hlen + olen < len) {
204 		m_freem(m);
205 		return NULL;	/* mbuf chain too short */
206 	}
207 
208 	/*
209 	 * easy cases first.
210 	 * we need to use m_copydata() to get data from <n->m_next, 0>.
211 	 */
212 	if ((off == 0 || offp) && M_TRAILINGSPACE(n) >= tlen
213 	 && writable) {
214 		m_copydata(n->m_next, 0, tlen, mtod(n, caddr_t) + n->m_len);
215 		n->m_len += tlen;
216 		m_adj(n->m_next, tlen);
217 		goto ok;
218 	}
219 	if ((off == 0 || offp) && M_LEADINGSPACE(n->m_next) >= hlen
220 	 && writable) {
221 		n->m_next->m_data -= hlen;
222 		n->m_next->m_len += hlen;
223 		bcopy(mtod(n, caddr_t) + off, mtod(n->m_next, caddr_t), hlen);
224 		n->m_len -= hlen;
225 		n = n->m_next;
226 		off = 0;
227 		goto ok;
228 	}
229 
230 	/*
231 	 * now, we need to do the hard way.  don't m_copy as there's no room
232 	 * on both end.
233 	 */
234 	if (len > MLEN)
235 		o = m_getcl(M_DONTWAIT, m->m_type, 0);
236 	else
237 		o = m_get(M_DONTWAIT, m->m_type);
238 	if (!o) {
239 		m_freem(m);
240 		return NULL;	/* ENOBUFS */
241 	}
242 	/* get hlen from <n, off> into <o, 0> */
243 	o->m_len = hlen;
244 	bcopy(mtod(n, caddr_t) + off, mtod(o, caddr_t), hlen);
245 	n->m_len -= hlen;
246 	/* get tlen from <n->m_next, 0> into <o, hlen> */
247 	m_copydata(n->m_next, 0, tlen, mtod(o, caddr_t) + o->m_len);
248 	o->m_len += tlen;
249 	m_adj(n->m_next, tlen);
250 	o->m_next = n->m_next;
251 	n->m_next = o;
252 	n = o;
253 	off = 0;
254 
255 ok:
256 #ifdef PULLDOWN_DEBUG
257     {
258 	struct mbuf *t;
259 	printf("after:");
260 	for (t = m; t; t = t->m_next)
261 		printf("%c%d", t == n ? '*' : ' ', t->m_len);
262 	printf(" (off=%d)\n", off);
263     }
264 #endif
265 	if (offp)
266 		*offp = off;
267 	return n;
268 }
269 
270 static struct mbuf *
271 m_dup1(struct mbuf *m, int off, int len, int wait)
272 {
273 	struct mbuf *n;
274 	int copyhdr;
275 
276 	if (len > MCLBYTES)
277 		return NULL;
278 	if (off == 0 && (m->m_flags & M_PKTHDR) != 0)
279 		copyhdr = 1;
280 	else
281 		copyhdr = 0;
282 	if (len >= MINCLSIZE) {
283 		if (copyhdr == 1)
284 			n = m_getcl(wait, m->m_type, M_PKTHDR);
285 		else
286 			n = m_getcl(wait, m->m_type, 0);
287 	} else {
288 		if (copyhdr == 1)
289 			n = m_gethdr(wait, m->m_type);
290 		else
291 			n = m_get(wait, m->m_type);
292 	}
293 	if (!n)
294 		return NULL; /* ENOBUFS */
295 
296 	if (copyhdr && !m_dup_pkthdr(n, m, wait)) {
297 		m_free(n);
298 		return NULL;
299 	}
300 	m_copydata(m, off, len, mtod(n, caddr_t));
301 	n->m_len = len;
302 	return n;
303 }
304 
305 /* Free a packet tag. */
306 void
307 m_tag_free_default(struct m_tag *t)
308 {
309 #ifdef MAC
310 	if (t->m_tag_id == PACKET_TAG_MACLABEL)
311 		mac_destroy_mbuf_tag(t);
312 #endif
313 	free(t, M_PACKET_TAGS);
314 }
315 
316 /* Get a packet tag structure along with specified data following. */
317 struct m_tag *
318 m_tag_alloc(u_int32_t cookie, int type, int len, int wait)
319 {
320 	struct m_tag *t;
321 
322 	MBUF_CHECKSLEEP(wait);
323 	if (len < 0)
324 		return NULL;
325 	t = malloc(len + sizeof(struct m_tag), M_PACKET_TAGS, wait);
326 	if (t == NULL)
327 		return NULL;
328 	m_tag_setup(t, cookie, type, len);
329 	t->m_tag_free = m_tag_free_default;
330 	return t;
331 }
332 
333 /* Unlink and free a packet tag. */
334 void
335 m_tag_delete(struct mbuf *m, struct m_tag *t)
336 {
337 
338 	KASSERT(m && t, ("m_tag_delete: null argument, m %p t %p", m, t));
339 	m_tag_unlink(m, t);
340 	m_tag_free(t);
341 }
342 
343 /* Unlink and free a packet tag chain, starting from given tag. */
344 void
345 m_tag_delete_chain(struct mbuf *m, struct m_tag *t)
346 {
347 	struct m_tag *p, *q;
348 
349 	KASSERT(m, ("m_tag_delete_chain: null mbuf"));
350 	if (t != NULL)
351 		p = t;
352 	else
353 		p = SLIST_FIRST(&m->m_pkthdr.tags);
354 	if (p == NULL)
355 		return;
356 	while ((q = SLIST_NEXT(p, m_tag_link)) != NULL)
357 		m_tag_delete(m, q);
358 	m_tag_delete(m, p);
359 }
360 
361 /*
362  * Strip off all tags that would normally vanish when
363  * passing through a network interface.  Only persistent
364  * tags will exist after this; these are expected to remain
365  * so long as the mbuf chain exists, regardless of the
366  * path the mbufs take.
367  */
368 void
369 m_tag_delete_nonpersistent(struct mbuf *m)
370 {
371 	struct m_tag *p, *q;
372 
373 	SLIST_FOREACH_SAFE(p, &m->m_pkthdr.tags, m_tag_link, q)
374 		if ((p->m_tag_id & MTAG_PERSISTENT) == 0)
375 			m_tag_delete(m, p);
376 }
377 
378 /* Find a tag, starting from a given position. */
379 struct m_tag *
380 m_tag_locate(struct mbuf *m, u_int32_t cookie, int type, struct m_tag *t)
381 {
382 	struct m_tag *p;
383 
384 	KASSERT(m, ("m_tag_locate: null mbuf"));
385 	if (t == NULL)
386 		p = SLIST_FIRST(&m->m_pkthdr.tags);
387 	else
388 		p = SLIST_NEXT(t, m_tag_link);
389 	while (p != NULL) {
390 		if (p->m_tag_cookie == cookie && p->m_tag_id == type)
391 			return p;
392 		p = SLIST_NEXT(p, m_tag_link);
393 	}
394 	return NULL;
395 }
396 
397 /* Copy a single tag. */
398 struct m_tag *
399 m_tag_copy(struct m_tag *t, int how)
400 {
401 	struct m_tag *p;
402 
403 	MBUF_CHECKSLEEP(how);
404 	KASSERT(t, ("m_tag_copy: null tag"));
405 	p = m_tag_alloc(t->m_tag_cookie, t->m_tag_id, t->m_tag_len, how);
406 	if (p == NULL)
407 		return (NULL);
408 #ifdef MAC
409 	/*
410 	 * XXXMAC: we should probably pass off the initialization, and
411 	 * copying here?  can we hide that PACKET_TAG_MACLABEL is
412 	 * special from the mbuf code?
413 	 */
414 	if (t->m_tag_id == PACKET_TAG_MACLABEL) {
415 		if (mac_init_mbuf_tag(p, how) != 0) {
416 			m_tag_free(p);
417 			return (NULL);
418 		}
419 		mac_copy_mbuf_tag(t, p);
420 	} else
421 #endif
422 		bcopy(t + 1, p + 1, t->m_tag_len); /* Copy the data */
423 	return p;
424 }
425 
426 /*
427  * Copy two tag chains. The destination mbuf (to) loses any attached
428  * tags even if the operation fails. This should not be a problem, as
429  * m_tag_copy_chain() is typically called with a newly-allocated
430  * destination mbuf.
431  */
432 int
433 m_tag_copy_chain(struct mbuf *to, struct mbuf *from, int how)
434 {
435 	struct m_tag *p, *t, *tprev = NULL;
436 
437 	MBUF_CHECKSLEEP(how);
438 	KASSERT(to && from,
439 		("m_tag_copy_chain: null argument, to %p from %p", to, from));
440 	m_tag_delete_chain(to, NULL);
441 	SLIST_FOREACH(p, &from->m_pkthdr.tags, m_tag_link) {
442 		t = m_tag_copy(p, how);
443 		if (t == NULL) {
444 			m_tag_delete_chain(to, NULL);
445 			return 0;
446 		}
447 		if (tprev == NULL)
448 			SLIST_INSERT_HEAD(&to->m_pkthdr.tags, t, m_tag_link);
449 		else
450 			SLIST_INSERT_AFTER(tprev, t, m_tag_link);
451 		tprev = t;
452 	}
453 	return 1;
454 }
455