xref: /freebsd/sys/netipsec/ipsec_mbuf.c (revision 6780ab54325a71e7e70112b11657973edde8655e)
1 /*	$FreeBSD$	*/
2 
3 /*
4  * IPsec-specific mbuf routines.
5  */
6 
7 #include "opt_param.h"
8 
9 #include <sys/param.h>
10 #include <sys/systm.h>
11 #include <sys/mbuf.h>
12 #include <sys/socket.h>
13 
14 #include <net/route.h>
15 #include <netinet/in.h>
16 
17 #include <netipsec/ipsec.h>
18 
19 extern	struct mbuf *m_getptr(struct mbuf *, int, int *);
20 
21 /*
22  * Create a writable copy of the mbuf chain.  While doing this
23  * we compact the chain with a goal of producing a chain with
24  * at most two mbufs.  The second mbuf in this chain is likely
25  * to be a cluster.  The primary purpose of this work is to create
26  * a writable packet for encryption, compression, etc.  The
27  * secondary goal is to linearize the data so the data can be
28  * passed to crypto hardware in the most efficient manner possible.
29  */
30 struct mbuf *
31 m_clone(struct mbuf *m0)
32 {
33 	struct mbuf *m, *mprev;
34 	struct mbuf *n, *mfirst, *mlast;
35 	int len, off;
36 
37 	KASSERT(m0 != NULL, ("m_clone: null mbuf"));
38 
39 	mprev = NULL;
40 	for (m = m0; m != NULL; m = mprev->m_next) {
41 		/*
42 		 * Regular mbufs are ignored unless there's a cluster
43 		 * in front of it that we can use to coalesce.  We do
44 		 * the latter mainly so later clusters can be coalesced
45 		 * also w/o having to handle them specially (i.e. convert
46 		 * mbuf+cluster -> cluster).  This optimization is heavily
47 		 * influenced by the assumption that we're running over
48 		 * Ethernet where MCLBYTES is large enough that the max
49 		 * packet size will permit lots of coalescing into a
50 		 * single cluster.  This in turn permits efficient
51 		 * crypto operations, especially when using hardware.
52 		 */
53 		if ((m->m_flags & M_EXT) == 0) {
54 			if (mprev && (mprev->m_flags & M_EXT) &&
55 			    m->m_len <= M_TRAILINGSPACE(mprev)) {
56 				/* XXX: this ignores mbuf types */
57 				memcpy(mtod(mprev, caddr_t) + mprev->m_len,
58 				       mtod(m, caddr_t), m->m_len);
59 				mprev->m_len += m->m_len;
60 				mprev->m_next = m->m_next;	/* unlink from chain */
61 				m_free(m);			/* reclaim mbuf */
62 				newipsecstat.ips_mbcoalesced++;
63 			} else {
64 				mprev = m;
65 			}
66 			continue;
67 		}
68 		/*
69 		 * Writable mbufs are left alone (for now).
70 		 */
71 		if (!MEXT_IS_REF(m)) {
72 			mprev = m;
73 			continue;
74 		}
75 
76 		/*
77 		 * Not writable, replace with a copy or coalesce with
78 		 * the previous mbuf if possible (since we have to copy
79 		 * it anyway, we try to reduce the number of mbufs and
80 		 * clusters so that future work is easier).
81 		 */
82 		KASSERT(m->m_flags & M_EXT,
83 			("m_clone: m_flags 0x%x", m->m_flags));
84 		/* NB: we only coalesce into a cluster or larger */
85 		if (mprev != NULL && (mprev->m_flags & M_EXT) &&
86 		    m->m_len <= M_TRAILINGSPACE(mprev)) {
87 			/* XXX: this ignores mbuf types */
88 			memcpy(mtod(mprev, caddr_t) + mprev->m_len,
89 			       mtod(m, caddr_t), m->m_len);
90 			mprev->m_len += m->m_len;
91 			mprev->m_next = m->m_next;	/* unlink from chain */
92 			m_free(m);			/* reclaim mbuf */
93 			newipsecstat.ips_clcoalesced++;
94 			continue;
95 		}
96 
97 		/*
98 		 * Allocate new space to hold the copy...
99 		 */
100 		/* XXX why can M_PKTHDR be set past the first mbuf? */
101 		if (mprev == NULL && (m->m_flags & M_PKTHDR)) {
102 			/*
103 			 * NB: if a packet header is present we must
104 			 * allocate the mbuf separately from any cluster
105 			 * because M_MOVE_PKTHDR will smash the data
106 			 * pointer and drop the M_EXT marker.
107 			 */
108 			MGETHDR(n, M_NOWAIT, m->m_type);
109 			if (n == NULL) {
110 				m_freem(m0);
111 				return (NULL);
112 			}
113 			M_MOVE_PKTHDR(n, m);
114 			MCLGET(n, M_NOWAIT);
115 			if ((n->m_flags & M_EXT) == 0) {
116 				m_free(n);
117 				m_freem(m0);
118 				return (NULL);
119 			}
120 		} else {
121 			n = m_getcl(M_NOWAIT, m->m_type, m->m_flags);
122 			if (n == NULL) {
123 				m_freem(m0);
124 				return (NULL);
125 			}
126 		}
127 		/*
128 		 * ... and copy the data.  We deal with jumbo mbufs
129 		 * (i.e. m_len > MCLBYTES) by splitting them into
130 		 * clusters.  We could just malloc a buffer and make
131 		 * it external but too many device drivers don't know
132 		 * how to break up the non-contiguous memory when
133 		 * doing DMA.
134 		 */
135 		len = m->m_len;
136 		off = 0;
137 		mfirst = n;
138 		mlast = NULL;
139 		for (;;) {
140 			int cc = min(len, MCLBYTES);
141 			memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, cc);
142 			n->m_len = cc;
143 			if (mlast != NULL)
144 				mlast->m_next = n;
145 			mlast = n;
146 			newipsecstat.ips_clcopied++;
147 
148 			len -= cc;
149 			if (len <= 0)
150 				break;
151 			off += cc;
152 
153 			n = m_getcl(M_NOWAIT, m->m_type, m->m_flags);
154 			if (n == NULL) {
155 				m_freem(mfirst);
156 				m_freem(m0);
157 				return (NULL);
158 			}
159 		}
160 		n->m_next = m->m_next;
161 		if (mprev == NULL)
162 			m0 = mfirst;		/* new head of chain */
163 		else
164 			mprev->m_next = mfirst;	/* replace old mbuf */
165 		m_free(m);			/* release old mbuf */
166 		mprev = mfirst;
167 	}
168 	return (m0);
169 }
170 
171 /*
172  * Make space for a new header of length hlen at skip bytes
173  * into the packet.  When doing this we allocate new mbufs only
174  * when absolutely necessary.  The mbuf where the new header
175  * is to go is returned together with an offset into the mbuf.
176  * If NULL is returned then the mbuf chain may have been modified;
177  * the caller is assumed to always free the chain.
178  */
179 struct mbuf *
180 m_makespace(struct mbuf *m0, int skip, int hlen, int *off)
181 {
182 	struct mbuf *m;
183 	unsigned remain;
184 
185 	KASSERT(m0 != NULL, ("m_dmakespace: null mbuf"));
186 	KASSERT(hlen < MHLEN, ("m_makespace: hlen too big: %u", hlen));
187 
188 	for (m = m0; m && skip > m->m_len; m = m->m_next)
189 		skip -= m->m_len;
190 	if (m == NULL)
191 		return (NULL);
192 	/*
193 	 * At this point skip is the offset into the mbuf m
194 	 * where the new header should be placed.  Figure out
195 	 * if there's space to insert the new header.  If so,
196 	 * and copying the remainder makese sense then do so.
197 	 * Otherwise insert a new mbuf in the chain, splitting
198 	 * the contents of m as needed.
199 	 */
200 	remain = m->m_len - skip;		/* data to move */
201 	if (hlen > M_TRAILINGSPACE(m)) {
202 		struct mbuf *n;
203 
204 		/* XXX code doesn't handle clusters XXX */
205 		KASSERT(remain < MLEN,
206 			("m_makespace: remainder too big: %u", remain));
207 		/*
208 		 * Not enough space in m, split the contents
209 		 * of m, inserting new mbufs as required.
210 		 *
211 		 * NB: this ignores mbuf types.
212 		 */
213 		MGET(n, M_NOWAIT, MT_DATA);
214 		if (n == NULL)
215 			return (NULL);
216 		n->m_next = m->m_next;		/* splice new mbuf */
217 		m->m_next = n;
218 		newipsecstat.ips_mbinserted++;
219 		if (hlen <= M_TRAILINGSPACE(m) + remain) {
220 			/*
221 			 * New header fits in the old mbuf if we copy
222 			 * the remainder; just do the copy to the new
223 			 * mbuf and we're good to go.
224 			 */
225 			memcpy(mtod(n, caddr_t),
226 			       mtod(m, caddr_t) + skip, remain);
227 			n->m_len = remain;
228 			m->m_len = skip + hlen;
229 			*off = skip;
230 		} else {
231 			/*
232 			 * No space in the old mbuf for the new header.
233 			 * Make space in the new mbuf and check the
234 			 * remainder'd data fits too.  If not then we
235 			 * must allocate an additional mbuf (yech).
236 			 */
237 			n->m_len = 0;
238 			if (remain + hlen > M_TRAILINGSPACE(n)) {
239 				struct mbuf *n2;
240 
241 				MGET(n2, M_NOWAIT, MT_DATA);
242 				/* NB: new mbuf is on chain, let caller free */
243 				if (n2 == NULL)
244 					return (NULL);
245 				n2->m_len = 0;
246 				memcpy(mtod(n2, caddr_t),
247 				       mtod(m, caddr_t) + skip, remain);
248 				n2->m_len = remain;
249 				/* splice in second mbuf */
250 				n2->m_next = n->m_next;
251 				n->m_next = n2;
252 				newipsecstat.ips_mbinserted++;
253 			} else {
254 				memcpy(mtod(n, caddr_t) + hlen,
255 				       mtod(m, caddr_t) + skip, remain);
256 				n->m_len += remain;
257 			}
258 			m->m_len -= remain;
259 			n->m_len += hlen;
260 			m = n;			/* header is at front ... */
261 			*off = 0;		/* ... of new mbuf */
262 		}
263 	} else {
264 		/*
265 		 * Copy the remainder to the back of the mbuf
266 		 * so there's space to write the new header.
267 		 */
268 		/* XXX can this be memcpy? does it handle overlap? */
269 		ovbcopy(mtod(m, caddr_t) + skip,
270 			mtod(m, caddr_t) + skip + hlen, remain);
271 		m->m_len += hlen;
272 		*off = skip;
273 	}
274 	m0->m_pkthdr.len += hlen;		/* adjust packet length */
275 	return m;
276 }
277 
278 /*
279  * m_pad(m, n) pads <m> with <n> bytes at the end. The packet header
280  * length is updated, and a pointer to the first byte of the padding
281  * (which is guaranteed to be all in one mbuf) is returned.
282  */
283 caddr_t
284 m_pad(struct mbuf *m, int n)
285 {
286 	register struct mbuf *m0, *m1;
287 	register int len, pad;
288 	caddr_t retval;
289 
290 	if (n <= 0) {  /* No stupid arguments. */
291 		DPRINTF(("m_pad: pad length invalid (%d)\n", n));
292 		m_freem(m);
293 		return NULL;
294 	}
295 
296 	len = m->m_pkthdr.len;
297 	pad = n;
298 	m0 = m;
299 
300 	while (m0->m_len < len) {
301 KASSERT(m0->m_next != NULL, ("m_pad: m0 null, len %u m_len %u", len, m0->m_len));/*XXX*/
302 		len -= m0->m_len;
303 		m0 = m0->m_next;
304 	}
305 
306 	if (m0->m_len != len) {
307 		DPRINTF(("m_pad: length mismatch (should be %d instead of %d)\n",
308 		    m->m_pkthdr.len, m->m_pkthdr.len + m0->m_len - len));
309 
310 		m_freem(m);
311 		return NULL;
312 	}
313 
314 	/* Check for zero-length trailing mbufs, and find the last one. */
315 	for (m1 = m0; m1->m_next; m1 = m1->m_next) {
316 		if (m1->m_next->m_len != 0) {
317 			DPRINTF(("m_pad: length mismatch (should be %d "
318 			    "instead of %d)\n",
319 			    m->m_pkthdr.len,
320 			    m->m_pkthdr.len + m1->m_next->m_len));
321 
322 			m_freem(m);
323 			return NULL;
324 		}
325 
326 		m0 = m1->m_next;
327 	}
328 
329 	if (pad > M_TRAILINGSPACE(m0)) {
330 		/* Add an mbuf to the chain. */
331 		MGET(m1, M_NOWAIT, MT_DATA);
332 		if (m1 == 0) {
333 			m_freem(m0);
334 			DPRINTF(("m_pad: unable to get extra mbuf\n"));
335 			return NULL;
336 		}
337 
338 		m0->m_next = m1;
339 		m0 = m1;
340 		m0->m_len = 0;
341 	}
342 
343 	retval = m0->m_data + m0->m_len;
344 	m0->m_len += pad;
345 	m->m_pkthdr.len += pad;
346 
347 	return retval;
348 }
349 
350 /*
351  * Remove hlen data at offset skip in the packet.  This is used by
352  * the protocols strip protocol headers and associated data (e.g. IV,
353  * authenticator) on input.
354  */
355 int
356 m_striphdr(struct mbuf *m, int skip, int hlen)
357 {
358 	struct mbuf *m1;
359 	int roff;
360 
361 	/* Find beginning of header */
362 	m1 = m_getptr(m, skip, &roff);
363 	if (m1 == NULL)
364 		return (EINVAL);
365 
366 	/* Remove the header and associated data from the mbuf. */
367 	if (roff == 0) {
368 		/* The header was at the beginning of the mbuf */
369 		newipsecstat.ips_input_front++;
370 		m_adj(m1, hlen);
371 		if ((m1->m_flags & M_PKTHDR) == 0)
372 			m->m_pkthdr.len -= hlen;
373 	} else if (roff + hlen >= m1->m_len) {
374 		struct mbuf *mo;
375 
376 		/*
377 		 * Part or all of the header is at the end of this mbuf,
378 		 * so first let's remove the remainder of the header from
379 		 * the beginning of the remainder of the mbuf chain, if any.
380 		 */
381 		newipsecstat.ips_input_end++;
382 		if (roff + hlen > m1->m_len) {
383 			/* Adjust the next mbuf by the remainder */
384 			m_adj(m1->m_next, roff + hlen - m1->m_len);
385 
386 			/* The second mbuf is guaranteed not to have a pkthdr... */
387 			m->m_pkthdr.len -= (roff + hlen - m1->m_len);
388 		}
389 
390 		/* Now, let's unlink the mbuf chain for a second...*/
391 		mo = m1->m_next;
392 		m1->m_next = NULL;
393 
394 		/* ...and trim the end of the first part of the chain...sick */
395 		m_adj(m1, -(m1->m_len - roff));
396 		if ((m1->m_flags & M_PKTHDR) == 0)
397 			m->m_pkthdr.len -= (m1->m_len - roff);
398 
399 		/* Finally, let's relink */
400 		m1->m_next = mo;
401 	} else {
402 		/*
403 		 * The header lies in the "middle" of the mbuf; copy
404 		 * the remainder of the mbuf down over the header.
405 		 */
406 		newipsecstat.ips_input_middle++;
407 		bcopy(mtod(m1, u_char *) + roff + hlen,
408 		      mtod(m1, u_char *) + roff,
409 		      m1->m_len - (roff + hlen));
410 		m1->m_len -= hlen;
411 		m->m_pkthdr.len -= hlen;
412 	}
413 	return (0);
414 }
415 
416 /*
417  * Diagnostic routine to check mbuf alignment as required by the
418  * crypto device drivers (that use DMA).
419  */
420 void
421 m_checkalignment(const char* where, struct mbuf *m0, int off, int len)
422 {
423 	int roff;
424 	struct mbuf *m = m_getptr(m0, off, &roff);
425 	caddr_t addr;
426 
427 	if (m == NULL)
428 		return;
429 	printf("%s (off %u len %u): ", where, off, len);
430 	addr = mtod(m, caddr_t) + roff;
431 	do {
432 		int mlen;
433 
434 		if (((uintptr_t) addr) & 3) {
435 			printf("addr misaligned %p,", addr);
436 			break;
437 		}
438 		mlen = m->m_len;
439 		if (mlen > len)
440 			mlen = len;
441 		len -= mlen;
442 		if (len && (mlen & 3)) {
443 			printf("len mismatch %u,", mlen);
444 			break;
445 		}
446 		m = m->m_next;
447 		addr = m ? mtod(m, caddr_t) : NULL;
448 	} while (m && len > 0);
449 	for (m = m0; m; m = m->m_next)
450 		printf(" [%p:%u]", mtod(m, caddr_t), m->m_len);
451 	printf("\n");
452 }
453