xref: /freebsd/sys/netipsec/ipsec_mbuf.c (revision a35d88931c87cfe6bd38f01d7bad22140b3b38f3)
1 /*-
2  * Copyright (c) 2002, 2003 Sam Leffler, Errno Consulting
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 /*
30  * IPsec-specific mbuf routines.
31  */
32 
33 #include "opt_param.h"
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/mbuf.h>
38 #include <sys/socket.h>
39 
40 #include <net/route.h>
41 #include <netinet/in.h>
42 
43 #include <netipsec/ipsec.h>
44 
45 /*
46  * Create a writable copy of the mbuf chain.  While doing this
47  * we compact the chain with a goal of producing a chain with
48  * at most two mbufs.  The second mbuf in this chain is likely
49  * to be a cluster.  The primary purpose of this work is to create
50  * a writable packet for encryption, compression, etc.  The
51  * secondary goal is to linearize the data so the data can be
52  * passed to crypto hardware in the most efficient manner possible.
53  */
54 struct mbuf *
55 m_clone(struct mbuf *m0)
56 {
57 	struct mbuf *m, *mprev;
58 	struct mbuf *n, *mfirst, *mlast;
59 	int len, off;
60 
61 	IPSEC_ASSERT(m0 != NULL, ("null mbuf"));
62 
63 	mprev = NULL;
64 	for (m = m0; m != NULL; m = mprev->m_next) {
65 		/*
66 		 * Regular mbufs are ignored unless there's a cluster
67 		 * in front of it that we can use to coalesce.  We do
68 		 * the latter mainly so later clusters can be coalesced
69 		 * also w/o having to handle them specially (i.e. convert
70 		 * mbuf+cluster -> cluster).  This optimization is heavily
71 		 * influenced by the assumption that we're running over
72 		 * Ethernet where MCLBYTES is large enough that the max
73 		 * packet size will permit lots of coalescing into a
74 		 * single cluster.  This in turn permits efficient
75 		 * crypto operations, especially when using hardware.
76 		 */
77 		if ((m->m_flags & M_EXT) == 0) {
78 			if (mprev && (mprev->m_flags & M_EXT) &&
79 			    m->m_len <= M_TRAILINGSPACE(mprev)) {
80 				/* XXX: this ignores mbuf types */
81 				memcpy(mtod(mprev, caddr_t) + mprev->m_len,
82 				       mtod(m, caddr_t), m->m_len);
83 				mprev->m_len += m->m_len;
84 				mprev->m_next = m->m_next;	/* unlink from chain */
85 				m_free(m);			/* reclaim mbuf */
86 				newipsecstat.ips_mbcoalesced++;
87 			} else {
88 				mprev = m;
89 			}
90 			continue;
91 		}
92 		/*
93 		 * Writable mbufs are left alone (for now).
94 		 */
95 		if (!MEXT_IS_REF(m)) {
96 			mprev = m;
97 			continue;
98 		}
99 
100 		/*
101 		 * Not writable, replace with a copy or coalesce with
102 		 * the previous mbuf if possible (since we have to copy
103 		 * it anyway, we try to reduce the number of mbufs and
104 		 * clusters so that future work is easier).
105 		 */
106 		IPSEC_ASSERT(m->m_flags & M_EXT, ("m_flags 0x%x", m->m_flags));
107 		/* NB: we only coalesce into a cluster or larger */
108 		if (mprev != NULL && (mprev->m_flags & M_EXT) &&
109 		    m->m_len <= M_TRAILINGSPACE(mprev)) {
110 			/* XXX: this ignores mbuf types */
111 			memcpy(mtod(mprev, caddr_t) + mprev->m_len,
112 			       mtod(m, caddr_t), m->m_len);
113 			mprev->m_len += m->m_len;
114 			mprev->m_next = m->m_next;	/* unlink from chain */
115 			m_free(m);			/* reclaim mbuf */
116 			newipsecstat.ips_clcoalesced++;
117 			continue;
118 		}
119 
120 		/*
121 		 * Allocate new space to hold the copy...
122 		 */
123 		/* XXX why can M_PKTHDR be set past the first mbuf? */
124 		if (mprev == NULL && (m->m_flags & M_PKTHDR)) {
125 			/*
126 			 * NB: if a packet header is present we must
127 			 * allocate the mbuf separately from any cluster
128 			 * because M_MOVE_PKTHDR will smash the data
129 			 * pointer and drop the M_EXT marker.
130 			 */
131 			MGETHDR(n, M_DONTWAIT, m->m_type);
132 			if (n == NULL) {
133 				m_freem(m0);
134 				return (NULL);
135 			}
136 			M_MOVE_PKTHDR(n, m);
137 			MCLGET(n, M_DONTWAIT);
138 			if ((n->m_flags & M_EXT) == 0) {
139 				m_free(n);
140 				m_freem(m0);
141 				return (NULL);
142 			}
143 		} else {
144 			n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
145 			if (n == NULL) {
146 				m_freem(m0);
147 				return (NULL);
148 			}
149 		}
150 		/*
151 		 * ... and copy the data.  We deal with jumbo mbufs
152 		 * (i.e. m_len > MCLBYTES) by splitting them into
153 		 * clusters.  We could just malloc a buffer and make
154 		 * it external but too many device drivers don't know
155 		 * how to break up the non-contiguous memory when
156 		 * doing DMA.
157 		 */
158 		len = m->m_len;
159 		off = 0;
160 		mfirst = n;
161 		mlast = NULL;
162 		for (;;) {
163 			int cc = min(len, MCLBYTES);
164 			memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, cc);
165 			n->m_len = cc;
166 			if (mlast != NULL)
167 				mlast->m_next = n;
168 			mlast = n;
169 			newipsecstat.ips_clcopied++;
170 
171 			len -= cc;
172 			if (len <= 0)
173 				break;
174 			off += cc;
175 
176 			n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
177 			if (n == NULL) {
178 				m_freem(mfirst);
179 				m_freem(m0);
180 				return (NULL);
181 			}
182 		}
183 		n->m_next = m->m_next;
184 		if (mprev == NULL)
185 			m0 = mfirst;		/* new head of chain */
186 		else
187 			mprev->m_next = mfirst;	/* replace old mbuf */
188 		m_free(m);			/* release old mbuf */
189 		mprev = mfirst;
190 	}
191 	return (m0);
192 }
193 
194 /*
195  * Make space for a new header of length hlen at skip bytes
196  * into the packet.  When doing this we allocate new mbufs only
197  * when absolutely necessary.  The mbuf where the new header
198  * is to go is returned together with an offset into the mbuf.
199  * If NULL is returned then the mbuf chain may have been modified;
200  * the caller is assumed to always free the chain.
201  */
202 struct mbuf *
203 m_makespace(struct mbuf *m0, int skip, int hlen, int *off)
204 {
205 	struct mbuf *m;
206 	unsigned remain;
207 
208 	IPSEC_ASSERT(m0 != NULL, ("null mbuf"));
209 	IPSEC_ASSERT(hlen < MHLEN, ("hlen too big: %u", hlen));
210 
211 	for (m = m0; m && skip > m->m_len; m = m->m_next)
212 		skip -= m->m_len;
213 	if (m == NULL)
214 		return (NULL);
215 	/*
216 	 * At this point skip is the offset into the mbuf m
217 	 * where the new header should be placed.  Figure out
218 	 * if there's space to insert the new header.  If so,
219 	 * and copying the remainder makese sense then do so.
220 	 * Otherwise insert a new mbuf in the chain, splitting
221 	 * the contents of m as needed.
222 	 */
223 	remain = m->m_len - skip;		/* data to move */
224 	if (hlen > M_TRAILINGSPACE(m)) {
225 		struct mbuf *n;
226 
227 		/* XXX code doesn't handle clusters XXX */
228 		IPSEC_ASSERT(remain < MLEN, ("remainder too big: %u", remain));
229 		/*
230 		 * Not enough space in m, split the contents
231 		 * of m, inserting new mbufs as required.
232 		 *
233 		 * NB: this ignores mbuf types.
234 		 */
235 		MGET(n, M_DONTWAIT, MT_DATA);
236 		if (n == NULL)
237 			return (NULL);
238 		n->m_next = m->m_next;		/* splice new mbuf */
239 		m->m_next = n;
240 		newipsecstat.ips_mbinserted++;
241 		if (hlen <= M_TRAILINGSPACE(m) + remain) {
242 			/*
243 			 * New header fits in the old mbuf if we copy
244 			 * the remainder; just do the copy to the new
245 			 * mbuf and we're good to go.
246 			 */
247 			memcpy(mtod(n, caddr_t),
248 			       mtod(m, caddr_t) + skip, remain);
249 			n->m_len = remain;
250 			m->m_len = skip + hlen;
251 			*off = skip;
252 		} else {
253 			/*
254 			 * No space in the old mbuf for the new header.
255 			 * Make space in the new mbuf and check the
256 			 * remainder'd data fits too.  If not then we
257 			 * must allocate an additional mbuf (yech).
258 			 */
259 			n->m_len = 0;
260 			if (remain + hlen > M_TRAILINGSPACE(n)) {
261 				struct mbuf *n2;
262 
263 				MGET(n2, M_DONTWAIT, MT_DATA);
264 				/* NB: new mbuf is on chain, let caller free */
265 				if (n2 == NULL)
266 					return (NULL);
267 				n2->m_len = 0;
268 				memcpy(mtod(n2, caddr_t),
269 				       mtod(m, caddr_t) + skip, remain);
270 				n2->m_len = remain;
271 				/* splice in second mbuf */
272 				n2->m_next = n->m_next;
273 				n->m_next = n2;
274 				newipsecstat.ips_mbinserted++;
275 			} else {
276 				memcpy(mtod(n, caddr_t) + hlen,
277 				       mtod(m, caddr_t) + skip, remain);
278 				n->m_len += remain;
279 			}
280 			m->m_len -= remain;
281 			n->m_len += hlen;
282 			m = n;			/* header is at front ... */
283 			*off = 0;		/* ... of new mbuf */
284 		}
285 	} else {
286 		/*
287 		 * Copy the remainder to the back of the mbuf
288 		 * so there's space to write the new header.
289 		 */
290 		bcopy(mtod(m, caddr_t) + skip,
291 		      mtod(m, caddr_t) + skip + hlen, remain);
292 		m->m_len += hlen;
293 		*off = skip;
294 	}
295 	m0->m_pkthdr.len += hlen;		/* adjust packet length */
296 	return m;
297 }
298 
299 /*
300  * m_pad(m, n) pads <m> with <n> bytes at the end. The packet header
301  * length is updated, and a pointer to the first byte of the padding
302  * (which is guaranteed to be all in one mbuf) is returned.
303  */
304 caddr_t
305 m_pad(struct mbuf *m, int n)
306 {
307 	register struct mbuf *m0, *m1;
308 	register int len, pad;
309 	caddr_t retval;
310 
311 	if (n <= 0) {  /* No stupid arguments. */
312 		DPRINTF(("%s: pad length invalid (%d)\n", __func__, n));
313 		m_freem(m);
314 		return NULL;
315 	}
316 
317 	len = m->m_pkthdr.len;
318 	pad = n;
319 	m0 = m;
320 
321 	while (m0->m_len < len) {
322 		len -= m0->m_len;
323 		m0 = m0->m_next;
324 	}
325 
326 	if (m0->m_len != len) {
327 		DPRINTF(("%s: length mismatch (should be %d instead of %d)\n",
328 			__func__, m->m_pkthdr.len,
329 			m->m_pkthdr.len + m0->m_len - len));
330 
331 		m_freem(m);
332 		return NULL;
333 	}
334 
335 	/* Check for zero-length trailing mbufs, and find the last one. */
336 	for (m1 = m0; m1->m_next; m1 = m1->m_next) {
337 		if (m1->m_next->m_len != 0) {
338 			DPRINTF(("%s: length mismatch (should be %d instead "
339 				"of %d)\n", __func__,
340 				m->m_pkthdr.len,
341 				m->m_pkthdr.len + m1->m_next->m_len));
342 
343 			m_freem(m);
344 			return NULL;
345 		}
346 
347 		m0 = m1->m_next;
348 	}
349 
350 	if (pad > M_TRAILINGSPACE(m0)) {
351 		/* Add an mbuf to the chain. */
352 		MGET(m1, M_DONTWAIT, MT_DATA);
353 		if (m1 == 0) {
354 			m_freem(m0);
355 			DPRINTF(("%s: unable to get extra mbuf\n", __func__));
356 			return NULL;
357 		}
358 
359 		m0->m_next = m1;
360 		m0 = m1;
361 		m0->m_len = 0;
362 	}
363 
364 	retval = m0->m_data + m0->m_len;
365 	m0->m_len += pad;
366 	m->m_pkthdr.len += pad;
367 
368 	return retval;
369 }
370 
371 /*
372  * Remove hlen data at offset skip in the packet.  This is used by
373  * the protocols strip protocol headers and associated data (e.g. IV,
374  * authenticator) on input.
375  */
376 int
377 m_striphdr(struct mbuf *m, int skip, int hlen)
378 {
379 	struct mbuf *m1;
380 	int roff;
381 
382 	/* Find beginning of header */
383 	m1 = m_getptr(m, skip, &roff);
384 	if (m1 == NULL)
385 		return (EINVAL);
386 
387 	/* Remove the header and associated data from the mbuf. */
388 	if (roff == 0) {
389 		/* The header was at the beginning of the mbuf */
390 		newipsecstat.ips_input_front++;
391 		m_adj(m1, hlen);
392 		if ((m1->m_flags & M_PKTHDR) == 0)
393 			m->m_pkthdr.len -= hlen;
394 	} else if (roff + hlen >= m1->m_len) {
395 		struct mbuf *mo;
396 
397 		/*
398 		 * Part or all of the header is at the end of this mbuf,
399 		 * so first let's remove the remainder of the header from
400 		 * the beginning of the remainder of the mbuf chain, if any.
401 		 */
402 		newipsecstat.ips_input_end++;
403 		if (roff + hlen > m1->m_len) {
404 			/* Adjust the next mbuf by the remainder */
405 			m_adj(m1->m_next, roff + hlen - m1->m_len);
406 
407 			/* The second mbuf is guaranteed not to have a pkthdr... */
408 			m->m_pkthdr.len -= (roff + hlen - m1->m_len);
409 		}
410 
411 		/* Now, let's unlink the mbuf chain for a second...*/
412 		mo = m1->m_next;
413 		m1->m_next = NULL;
414 
415 		/* ...and trim the end of the first part of the chain...sick */
416 		m_adj(m1, -(m1->m_len - roff));
417 		if ((m1->m_flags & M_PKTHDR) == 0)
418 			m->m_pkthdr.len -= (m1->m_len - roff);
419 
420 		/* Finally, let's relink */
421 		m1->m_next = mo;
422 	} else {
423 		/*
424 		 * The header lies in the "middle" of the mbuf; copy
425 		 * the remainder of the mbuf down over the header.
426 		 */
427 		newipsecstat.ips_input_middle++;
428 		bcopy(mtod(m1, u_char *) + roff + hlen,
429 		      mtod(m1, u_char *) + roff,
430 		      m1->m_len - (roff + hlen));
431 		m1->m_len -= hlen;
432 		m->m_pkthdr.len -= hlen;
433 	}
434 	return (0);
435 }
436 
437 /*
438  * Diagnostic routine to check mbuf alignment as required by the
439  * crypto device drivers (that use DMA).
440  */
441 void
442 m_checkalignment(const char* where, struct mbuf *m0, int off, int len)
443 {
444 	int roff;
445 	struct mbuf *m = m_getptr(m0, off, &roff);
446 	caddr_t addr;
447 
448 	if (m == NULL)
449 		return;
450 	printf("%s (off %u len %u): ", where, off, len);
451 	addr = mtod(m, caddr_t) + roff;
452 	do {
453 		int mlen;
454 
455 		if (((uintptr_t) addr) & 3) {
456 			printf("addr misaligned %p,", addr);
457 			break;
458 		}
459 		mlen = m->m_len;
460 		if (mlen > len)
461 			mlen = len;
462 		len -= mlen;
463 		if (len && (mlen & 3)) {
464 			printf("len mismatch %u,", mlen);
465 			break;
466 		}
467 		m = m->m_next;
468 		addr = m ? mtod(m, caddr_t) : NULL;
469 	} while (m && len > 0);
470 	for (m = m0; m; m = m->m_next)
471 		printf(" [%p:%u]", mtod(m, caddr_t), m->m_len);
472 	printf("\n");
473 }
474