xref: /freebsd/sys/netipsec/ipsec_mbuf.c (revision 729362425c09cf6b362366aabc6fb547eee8035a)
1 /*-
2  * Copyright (c) 2002, 2003 Sam Leffler, Errno Consulting
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 /*
30  * IPsec-specific mbuf routines.
31  */
32 
33 #include "opt_param.h"
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/mbuf.h>
38 #include <sys/socket.h>
39 
40 #include <net/route.h>
41 #include <netinet/in.h>
42 
43 #include <netipsec/ipsec.h>
44 
45 extern	struct mbuf *m_getptr(struct mbuf *, int, int *);
46 
47 /*
48  * Create a writable copy of the mbuf chain.  While doing this
49  * we compact the chain with a goal of producing a chain with
50  * at most two mbufs.  The second mbuf in this chain is likely
51  * to be a cluster.  The primary purpose of this work is to create
52  * a writable packet for encryption, compression, etc.  The
53  * secondary goal is to linearize the data so the data can be
54  * passed to crypto hardware in the most efficient manner possible.
55  */
56 struct mbuf *
57 m_clone(struct mbuf *m0)
58 {
59 	struct mbuf *m, *mprev;
60 	struct mbuf *n, *mfirst, *mlast;
61 	int len, off;
62 
63 	KASSERT(m0 != NULL, ("m_clone: null mbuf"));
64 
65 	mprev = NULL;
66 	for (m = m0; m != NULL; m = mprev->m_next) {
67 		/*
68 		 * Regular mbufs are ignored unless there's a cluster
69 		 * in front of it that we can use to coalesce.  We do
70 		 * the latter mainly so later clusters can be coalesced
71 		 * also w/o having to handle them specially (i.e. convert
72 		 * mbuf+cluster -> cluster).  This optimization is heavily
73 		 * influenced by the assumption that we're running over
74 		 * Ethernet where MCLBYTES is large enough that the max
75 		 * packet size will permit lots of coalescing into a
76 		 * single cluster.  This in turn permits efficient
77 		 * crypto operations, especially when using hardware.
78 		 */
79 		if ((m->m_flags & M_EXT) == 0) {
80 			if (mprev && (mprev->m_flags & M_EXT) &&
81 			    m->m_len <= M_TRAILINGSPACE(mprev)) {
82 				/* XXX: this ignores mbuf types */
83 				memcpy(mtod(mprev, caddr_t) + mprev->m_len,
84 				       mtod(m, caddr_t), m->m_len);
85 				mprev->m_len += m->m_len;
86 				mprev->m_next = m->m_next;	/* unlink from chain */
87 				m_free(m);			/* reclaim mbuf */
88 				newipsecstat.ips_mbcoalesced++;
89 			} else {
90 				mprev = m;
91 			}
92 			continue;
93 		}
94 		/*
95 		 * Writable mbufs are left alone (for now).
96 		 */
97 		if (!MEXT_IS_REF(m)) {
98 			mprev = m;
99 			continue;
100 		}
101 
102 		/*
103 		 * Not writable, replace with a copy or coalesce with
104 		 * the previous mbuf if possible (since we have to copy
105 		 * it anyway, we try to reduce the number of mbufs and
106 		 * clusters so that future work is easier).
107 		 */
108 		KASSERT(m->m_flags & M_EXT,
109 			("m_clone: m_flags 0x%x", m->m_flags));
110 		/* NB: we only coalesce into a cluster or larger */
111 		if (mprev != NULL && (mprev->m_flags & M_EXT) &&
112 		    m->m_len <= M_TRAILINGSPACE(mprev)) {
113 			/* XXX: this ignores mbuf types */
114 			memcpy(mtod(mprev, caddr_t) + mprev->m_len,
115 			       mtod(m, caddr_t), m->m_len);
116 			mprev->m_len += m->m_len;
117 			mprev->m_next = m->m_next;	/* unlink from chain */
118 			m_free(m);			/* reclaim mbuf */
119 			newipsecstat.ips_clcoalesced++;
120 			continue;
121 		}
122 
123 		/*
124 		 * Allocate new space to hold the copy...
125 		 */
126 		/* XXX why can M_PKTHDR be set past the first mbuf? */
127 		if (mprev == NULL && (m->m_flags & M_PKTHDR)) {
128 			/*
129 			 * NB: if a packet header is present we must
130 			 * allocate the mbuf separately from any cluster
131 			 * because M_MOVE_PKTHDR will smash the data
132 			 * pointer and drop the M_EXT marker.
133 			 */
134 			MGETHDR(n, M_DONTWAIT, m->m_type);
135 			if (n == NULL) {
136 				m_freem(m0);
137 				return (NULL);
138 			}
139 			M_MOVE_PKTHDR(n, m);
140 			MCLGET(n, M_DONTWAIT);
141 			if ((n->m_flags & M_EXT) == 0) {
142 				m_free(n);
143 				m_freem(m0);
144 				return (NULL);
145 			}
146 		} else {
147 			n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
148 			if (n == NULL) {
149 				m_freem(m0);
150 				return (NULL);
151 			}
152 		}
153 		/*
154 		 * ... and copy the data.  We deal with jumbo mbufs
155 		 * (i.e. m_len > MCLBYTES) by splitting them into
156 		 * clusters.  We could just malloc a buffer and make
157 		 * it external but too many device drivers don't know
158 		 * how to break up the non-contiguous memory when
159 		 * doing DMA.
160 		 */
161 		len = m->m_len;
162 		off = 0;
163 		mfirst = n;
164 		mlast = NULL;
165 		for (;;) {
166 			int cc = min(len, MCLBYTES);
167 			memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, cc);
168 			n->m_len = cc;
169 			if (mlast != NULL)
170 				mlast->m_next = n;
171 			mlast = n;
172 			newipsecstat.ips_clcopied++;
173 
174 			len -= cc;
175 			if (len <= 0)
176 				break;
177 			off += cc;
178 
179 			n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
180 			if (n == NULL) {
181 				m_freem(mfirst);
182 				m_freem(m0);
183 				return (NULL);
184 			}
185 		}
186 		n->m_next = m->m_next;
187 		if (mprev == NULL)
188 			m0 = mfirst;		/* new head of chain */
189 		else
190 			mprev->m_next = mfirst;	/* replace old mbuf */
191 		m_free(m);			/* release old mbuf */
192 		mprev = mfirst;
193 	}
194 	return (m0);
195 }
196 
197 /*
198  * Make space for a new header of length hlen at skip bytes
199  * into the packet.  When doing this we allocate new mbufs only
200  * when absolutely necessary.  The mbuf where the new header
201  * is to go is returned together with an offset into the mbuf.
202  * If NULL is returned then the mbuf chain may have been modified;
203  * the caller is assumed to always free the chain.
204  */
205 struct mbuf *
206 m_makespace(struct mbuf *m0, int skip, int hlen, int *off)
207 {
208 	struct mbuf *m;
209 	unsigned remain;
210 
211 	KASSERT(m0 != NULL, ("m_dmakespace: null mbuf"));
212 	KASSERT(hlen < MHLEN, ("m_makespace: hlen too big: %u", hlen));
213 
214 	for (m = m0; m && skip > m->m_len; m = m->m_next)
215 		skip -= m->m_len;
216 	if (m == NULL)
217 		return (NULL);
218 	/*
219 	 * At this point skip is the offset into the mbuf m
220 	 * where the new header should be placed.  Figure out
221 	 * if there's space to insert the new header.  If so,
222 	 * and copying the remainder makese sense then do so.
223 	 * Otherwise insert a new mbuf in the chain, splitting
224 	 * the contents of m as needed.
225 	 */
226 	remain = m->m_len - skip;		/* data to move */
227 	if (hlen > M_TRAILINGSPACE(m)) {
228 		struct mbuf *n;
229 
230 		/* XXX code doesn't handle clusters XXX */
231 		KASSERT(remain < MLEN,
232 			("m_makespace: remainder too big: %u", remain));
233 		/*
234 		 * Not enough space in m, split the contents
235 		 * of m, inserting new mbufs as required.
236 		 *
237 		 * NB: this ignores mbuf types.
238 		 */
239 		MGET(n, M_DONTWAIT, MT_DATA);
240 		if (n == NULL)
241 			return (NULL);
242 		n->m_next = m->m_next;		/* splice new mbuf */
243 		m->m_next = n;
244 		newipsecstat.ips_mbinserted++;
245 		if (hlen <= M_TRAILINGSPACE(m) + remain) {
246 			/*
247 			 * New header fits in the old mbuf if we copy
248 			 * the remainder; just do the copy to the new
249 			 * mbuf and we're good to go.
250 			 */
251 			memcpy(mtod(n, caddr_t),
252 			       mtod(m, caddr_t) + skip, remain);
253 			n->m_len = remain;
254 			m->m_len = skip + hlen;
255 			*off = skip;
256 		} else {
257 			/*
258 			 * No space in the old mbuf for the new header.
259 			 * Make space in the new mbuf and check the
260 			 * remainder'd data fits too.  If not then we
261 			 * must allocate an additional mbuf (yech).
262 			 */
263 			n->m_len = 0;
264 			if (remain + hlen > M_TRAILINGSPACE(n)) {
265 				struct mbuf *n2;
266 
267 				MGET(n2, M_DONTWAIT, MT_DATA);
268 				/* NB: new mbuf is on chain, let caller free */
269 				if (n2 == NULL)
270 					return (NULL);
271 				n2->m_len = 0;
272 				memcpy(mtod(n2, caddr_t),
273 				       mtod(m, caddr_t) + skip, remain);
274 				n2->m_len = remain;
275 				/* splice in second mbuf */
276 				n2->m_next = n->m_next;
277 				n->m_next = n2;
278 				newipsecstat.ips_mbinserted++;
279 			} else {
280 				memcpy(mtod(n, caddr_t) + hlen,
281 				       mtod(m, caddr_t) + skip, remain);
282 				n->m_len += remain;
283 			}
284 			m->m_len -= remain;
285 			n->m_len += hlen;
286 			m = n;			/* header is at front ... */
287 			*off = 0;		/* ... of new mbuf */
288 		}
289 	} else {
290 		/*
291 		 * Copy the remainder to the back of the mbuf
292 		 * so there's space to write the new header.
293 		 */
294 		/* XXX can this be memcpy? does it handle overlap? */
295 		ovbcopy(mtod(m, caddr_t) + skip,
296 			mtod(m, caddr_t) + skip + hlen, remain);
297 		m->m_len += hlen;
298 		*off = skip;
299 	}
300 	m0->m_pkthdr.len += hlen;		/* adjust packet length */
301 	return m;
302 }
303 
304 /*
305  * m_pad(m, n) pads <m> with <n> bytes at the end. The packet header
306  * length is updated, and a pointer to the first byte of the padding
307  * (which is guaranteed to be all in one mbuf) is returned.
308  */
309 caddr_t
310 m_pad(struct mbuf *m, int n)
311 {
312 	register struct mbuf *m0, *m1;
313 	register int len, pad;
314 	caddr_t retval;
315 
316 	if (n <= 0) {  /* No stupid arguments. */
317 		DPRINTF(("m_pad: pad length invalid (%d)\n", n));
318 		m_freem(m);
319 		return NULL;
320 	}
321 
322 	len = m->m_pkthdr.len;
323 	pad = n;
324 	m0 = m;
325 
326 	while (m0->m_len < len) {
327 KASSERT(m0->m_next != NULL, ("m_pad: m0 null, len %u m_len %u", len, m0->m_len));/*XXX*/
328 		len -= m0->m_len;
329 		m0 = m0->m_next;
330 	}
331 
332 	if (m0->m_len != len) {
333 		DPRINTF(("m_pad: length mismatch (should be %d instead of %d)\n",
334 		    m->m_pkthdr.len, m->m_pkthdr.len + m0->m_len - len));
335 
336 		m_freem(m);
337 		return NULL;
338 	}
339 
340 	/* Check for zero-length trailing mbufs, and find the last one. */
341 	for (m1 = m0; m1->m_next; m1 = m1->m_next) {
342 		if (m1->m_next->m_len != 0) {
343 			DPRINTF(("m_pad: length mismatch (should be %d "
344 			    "instead of %d)\n",
345 			    m->m_pkthdr.len,
346 			    m->m_pkthdr.len + m1->m_next->m_len));
347 
348 			m_freem(m);
349 			return NULL;
350 		}
351 
352 		m0 = m1->m_next;
353 	}
354 
355 	if (pad > M_TRAILINGSPACE(m0)) {
356 		/* Add an mbuf to the chain. */
357 		MGET(m1, M_DONTWAIT, MT_DATA);
358 		if (m1 == 0) {
359 			m_freem(m0);
360 			DPRINTF(("m_pad: unable to get extra mbuf\n"));
361 			return NULL;
362 		}
363 
364 		m0->m_next = m1;
365 		m0 = m1;
366 		m0->m_len = 0;
367 	}
368 
369 	retval = m0->m_data + m0->m_len;
370 	m0->m_len += pad;
371 	m->m_pkthdr.len += pad;
372 
373 	return retval;
374 }
375 
376 /*
377  * Remove hlen data at offset skip in the packet.  This is used by
378  * the protocols strip protocol headers and associated data (e.g. IV,
379  * authenticator) on input.
380  */
381 int
382 m_striphdr(struct mbuf *m, int skip, int hlen)
383 {
384 	struct mbuf *m1;
385 	int roff;
386 
387 	/* Find beginning of header */
388 	m1 = m_getptr(m, skip, &roff);
389 	if (m1 == NULL)
390 		return (EINVAL);
391 
392 	/* Remove the header and associated data from the mbuf. */
393 	if (roff == 0) {
394 		/* The header was at the beginning of the mbuf */
395 		newipsecstat.ips_input_front++;
396 		m_adj(m1, hlen);
397 		if ((m1->m_flags & M_PKTHDR) == 0)
398 			m->m_pkthdr.len -= hlen;
399 	} else if (roff + hlen >= m1->m_len) {
400 		struct mbuf *mo;
401 
402 		/*
403 		 * Part or all of the header is at the end of this mbuf,
404 		 * so first let's remove the remainder of the header from
405 		 * the beginning of the remainder of the mbuf chain, if any.
406 		 */
407 		newipsecstat.ips_input_end++;
408 		if (roff + hlen > m1->m_len) {
409 			/* Adjust the next mbuf by the remainder */
410 			m_adj(m1->m_next, roff + hlen - m1->m_len);
411 
412 			/* The second mbuf is guaranteed not to have a pkthdr... */
413 			m->m_pkthdr.len -= (roff + hlen - m1->m_len);
414 		}
415 
416 		/* Now, let's unlink the mbuf chain for a second...*/
417 		mo = m1->m_next;
418 		m1->m_next = NULL;
419 
420 		/* ...and trim the end of the first part of the chain...sick */
421 		m_adj(m1, -(m1->m_len - roff));
422 		if ((m1->m_flags & M_PKTHDR) == 0)
423 			m->m_pkthdr.len -= (m1->m_len - roff);
424 
425 		/* Finally, let's relink */
426 		m1->m_next = mo;
427 	} else {
428 		/*
429 		 * The header lies in the "middle" of the mbuf; copy
430 		 * the remainder of the mbuf down over the header.
431 		 */
432 		newipsecstat.ips_input_middle++;
433 		bcopy(mtod(m1, u_char *) + roff + hlen,
434 		      mtod(m1, u_char *) + roff,
435 		      m1->m_len - (roff + hlen));
436 		m1->m_len -= hlen;
437 		m->m_pkthdr.len -= hlen;
438 	}
439 	return (0);
440 }
441 
442 /*
443  * Diagnostic routine to check mbuf alignment as required by the
444  * crypto device drivers (that use DMA).
445  */
446 void
447 m_checkalignment(const char* where, struct mbuf *m0, int off, int len)
448 {
449 	int roff;
450 	struct mbuf *m = m_getptr(m0, off, &roff);
451 	caddr_t addr;
452 
453 	if (m == NULL)
454 		return;
455 	printf("%s (off %u len %u): ", where, off, len);
456 	addr = mtod(m, caddr_t) + roff;
457 	do {
458 		int mlen;
459 
460 		if (((uintptr_t) addr) & 3) {
461 			printf("addr misaligned %p,", addr);
462 			break;
463 		}
464 		mlen = m->m_len;
465 		if (mlen > len)
466 			mlen = len;
467 		len -= mlen;
468 		if (len && (mlen & 3)) {
469 			printf("len mismatch %u,", mlen);
470 			break;
471 		}
472 		m = m->m_next;
473 		addr = m ? mtod(m, caddr_t) : NULL;
474 	} while (m && len > 0);
475 	for (m = m0; m; m = m->m_next)
476 		printf(" [%p:%u]", mtod(m, caddr_t), m->m_len);
477 	printf("\n");
478 }
479